diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index 64ab4f0b09a..96c83674cb0 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -84,11 +84,12 @@ class InstrItinData stages, // Processor itineraries - These values represent the set of all itinerary // classes for a given chip set. // -class ProcessorItineraries iid> { +class ProcessorItineraries fu, list iid> { + list FU = fu; list IID = iid; } // NoItineraries - A marker that can be used by processors without schedule // info. -def NoItineraries : ProcessorItineraries<[]>; +def NoItineraries : ProcessorItineraries<[], []>; diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 1c96976d6c4..b60ccca4686 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -7,19 +7,6 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Functional units across ARM processors -// -def FU_Issue : FuncUnit; // issue -def FU_Pipe0 : FuncUnit; // pipeline 0 -def FU_Pipe1 : FuncUnit; // pipeline 1 -def FU_LdSt0 : FuncUnit; // pipeline 0 load/store -def FU_LdSt1 : FuncUnit; // pipeline 1 load/store -def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe -def FU_NLSPipe : FuncUnit; // NEON LS pipe -def FU_DRegsVFP: FuncUnit; // FP register set, VFP side -def FU_DRegsN : FuncUnit; // FP register set, NEON side - //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM // @@ -165,8 +152,7 @@ def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[]>; - +def GenericItineraries : ProcessorItineraries<[], []>; include "ARMScheduleV6.td" include "ARMScheduleA8.td" diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index d43a9c7643d..bbfc0b2b474 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -13,156 +13,164 @@ // // Scheduling information derived from "Cortex-A8 Technical Reference Manual". +// Functional Units. +def A8_Issue : FuncUnit; // issue +def A8_Pipe0 : FuncUnit; // pipeline 0 +def A8_Pipe1 : FuncUnit; // pipeline 1 +def A8_LdSt0 : FuncUnit; // pipeline 0 load/store +def A8_LdSt1 : FuncUnit; // pipeline 1 load/store +def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe +def A8_NLSPipe : FuncUnit; // NEON LS pipe // -// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 +// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1 // -def CortexA8Itineraries : ProcessorItineraries<[ - +def CortexA8Itineraries : ProcessorItineraries< + [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [ // Two fully-pipelined integer ALU pipelines // // No operand cycles - InstrItinData]>, + InstrItinData]>, // // Binary Instructions that produce a result - InstrItinData], [2, 2]>, - InstrItinData], [2, 2, 2]>, - InstrItinData], [2, 2, 1]>, - InstrItinData], [2, 2, 1, 1]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 2, 2]>, + InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 2, 1, 1]>, // // Unary Instructions that produce a result - InstrItinData], [2, 2]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1, 1]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, // // Compare instructions - InstrItinData], [2]>, - InstrItinData], [2, 2]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1, 1]>, + InstrItinData], [2]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, // // Move instructions, unconditional - InstrItinData], [1]>, - InstrItinData], [1, 1]>, - InstrItinData], [1, 1]>, - InstrItinData], [1, 1, 1]>, + InstrItinData], [1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1, 1]>, // // Move instructions, conditional - InstrItinData], [2]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1, 1]>, + InstrItinData], [2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, // Integer multiply pipeline // Result written in E5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases // - InstrItinData], [5, 1, 1]>, - InstrItinData, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>, - InstrItinData, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData, - InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, - InstrItinData, - InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, + InstrItinData], [5, 1, 1]>, + InstrItinData, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, + InstrItinData, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData, + InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, + InstrItinData, + InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, // Integer load pipeline // // loads have an extra cycle of latency, but are fully pipelined - // use FU_Issue to enforce the 1 load/store per cycle limit + // use A8_Issue to enforce the 1 load/store per cycle limit // // Immediate offset - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, // // Register offset - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>, // // Immediate offset with update - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>, // // Register offset with update - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>, // // Load multiple - InstrItinData, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, + InstrItinData, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>]>, // Integer store pipeline // - // use FU_Issue to enforce the 1 load/store per cycle limit + // use A8_Issue to enforce the 1 load/store per cycle limit // // Immediate offset - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, // // Register offset - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, // // Immediate offset with update - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>, // // Register offset with update - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>, // // Store multiple - InstrItinData, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, + InstrItinData, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>]>, // Branch // // no delay slots, so the latency of a branch is unimportant - InstrItinData]>, + InstrItinData]>, // VFP // Issue through integer pipeline, and execute in NEON unit. We assume @@ -170,441 +178,441 @@ def CortexA8Itineraries : ProcessorItineraries<[ // possible. // // FP Special Register to Integer Register File Move - InstrItinData, - InstrStage<1, [FU_NLSPipe]>]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>]>, // // Single-precision FP Unary - InstrItinData, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [7, 1]>, // // Double-precision FP Unary - InstrItinData, - InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>], [4, 1]>, + InstrItinData, + InstrStage<4, [A8_NPipe], 0>, + InstrStage<4, [A8_NLSPipe]>], [4, 1]>, // // Single-precision FP Compare - InstrItinData, - InstrStage<1, [FU_NPipe]>], [1, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [1, 1]>, // // Double-precision FP Compare - InstrItinData, - InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>], [4, 1]>, + InstrItinData, + InstrStage<4, [A8_NPipe], 0>, + InstrStage<4, [A8_NLSPipe]>], [4, 1]>, // // Single to Double FP Convert - InstrItinData, - InstrStage<7, [FU_NPipe], 0>, - InstrStage<7, [FU_NLSPipe]>], [7, 1]>, + InstrItinData, + InstrStage<7, [A8_NPipe], 0>, + InstrStage<7, [A8_NLSPipe]>], [7, 1]>, // // Double to Single FP Convert - InstrItinData, - InstrStage<5, [FU_NPipe], 0>, - InstrStage<5, [FU_NLSPipe]>], [5, 1]>, + InstrItinData, + InstrStage<5, [A8_NPipe], 0>, + InstrStage<5, [A8_NLSPipe]>], [5, 1]>, // // Single-Precision FP to Integer Convert - InstrItinData, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [7, 1]>, // // Double-Precision FP to Integer Convert - InstrItinData, - InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>], [8, 1]>, + InstrItinData, + InstrStage<8, [A8_NPipe], 0>, + InstrStage<8, [A8_NLSPipe]>], [8, 1]>, // // Integer to Single-Precision FP Convert - InstrItinData, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [7, 1]>, // // Integer to Double-Precision FP Convert - InstrItinData, - InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>], [8, 1]>, + InstrItinData, + InstrStage<8, [A8_NPipe], 0>, + InstrStage<8, [A8_NLSPipe]>], [8, 1]>, // // Single-precision FP ALU - InstrItinData, - InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, // // Double-precision FP ALU - InstrItinData, - InstrStage<9, [FU_NPipe], 0>, - InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>, + InstrItinData, + InstrStage<9, [A8_NPipe], 0>, + InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>, // // Single-precision FP Multiply - InstrItinData, - InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, // // Double-precision FP Multiply - InstrItinData, - InstrStage<11, [FU_NPipe], 0>, - InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>, + InstrItinData, + InstrStage<11, [A8_NPipe], 0>, + InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>, // // Single-precision FP MAC - InstrItinData, - InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, // // Double-precision FP MAC - InstrItinData, - InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>, + InstrItinData, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, // // Single-precision FP DIV - InstrItinData, - InstrStage<20, [FU_NPipe], 0>, - InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>, + InstrItinData, + InstrStage<20, [A8_NPipe], 0>, + InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>, // // Double-precision FP DIV - InstrItinData, - InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>, + InstrItinData, + InstrStage<29, [A8_NPipe], 0>, + InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>, // // Single-precision FP SQRT - InstrItinData, - InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>], [19, 1]>, + InstrItinData, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 1]>, // // Double-precision FP SQRT - InstrItinData, - InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>], [29, 1]>, + InstrItinData, + InstrStage<29, [A8_NPipe], 0>, + InstrStage<29, [A8_NLSPipe]>], [29, 1]>, // // Single-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // Double-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // FP Load Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // Single-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // Double-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // FP Store Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // NEON // Issue through integer pipeline, and execute in NEON unit. // // VLD1 // FIXME: We don't model this instruction properly - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // VLD2 // FIXME: We don't model this instruction properly - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>, // // VLD3 // FIXME: We don't model this instruction properly - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>, // // VLD4 // FIXME: We don't model this instruction properly - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>, // // VST // FIXME: We don't model this instruction properly - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, // // Double-register FP Unary - InstrItinData, - InstrStage<1, [FU_NPipe]>], [5, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [5, 2]>, // // Quad-register FP Unary // Result written in N5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases - InstrItinData, - InstrStage<2, [FU_NPipe]>], [6, 2]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [6, 2]>, // // Double-register FP Binary - InstrItinData, - InstrStage<1, [FU_NPipe]>], [5, 2, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, // // Quad-register FP Binary // Result written in N5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases - InstrItinData, - InstrStage<2, [FU_NPipe]>], [6, 2, 2]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [6, 2, 2]>, // // Move Immediate - InstrItinData, - InstrStage<1, [FU_NPipe]>], [3]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [3]>, // // Double-register Permute Move - InstrItinData, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>], [2, 1]>, // // Quad-register Permute Move // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 3 for those cases - InstrItinData, - InstrStage<2, [FU_NLSPipe]>], [3, 1]>, + InstrItinData, + InstrStage<2, [A8_NLSPipe]>], [3, 1]>, // // Integer to Single-precision Move - InstrItinData, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>], [2, 1]>, // // Integer to Double-precision Move - InstrItinData, - InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, // // Single-precision to Integer Move - InstrItinData, - InstrStage<1, [FU_NLSPipe]>], [20, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>], [20, 1]>, // // Double-precision to Integer Move - InstrItinData, - InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>, // // Integer to Lane Move - InstrItinData, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, + InstrItinData, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, // // Double-register Permute - InstrItinData, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>, // // Quad-register Permute // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 3 for those cases - InstrItinData, - InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>, + InstrItinData, + InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>, // // Quad-register Permute (3 cycle issue) // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 4 for those cases - InstrItinData, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>, // // Double-register FP Multiple-Accumulate - InstrItinData, - InstrStage<1, [FU_NPipe]>], [9, 3, 2, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, // // Quad-register FP Multiple-Accumulate // Result written in N9, but that is relative to the last cycle of multicycle, // so we use 10 for those cases - InstrItinData, - InstrStage<2, [FU_NPipe]>], [10, 3, 2, 2]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, // // Double-register Reciprical Step - InstrItinData, - InstrStage<1, [FU_NPipe]>], [9, 2, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, // // Quad-register Reciprical Step - InstrItinData, - InstrStage<2, [FU_NPipe]>], [10, 2, 2]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [10, 2, 2]>, // // Double-register Integer Count - InstrItinData, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Count // Result written in N3, but that is relative to the last cycle of multicycle, // so we use 4 for those cases - InstrItinData, - InstrStage<2, [FU_NPipe]>], [4, 2, 2]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [4, 2, 2]>, // // Double-register Integer Unary - InstrItinData, - InstrStage<1, [FU_NPipe]>], [4, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [4, 2]>, // // Quad-register Integer Unary - InstrItinData, - InstrStage<1, [FU_NPipe]>], [4, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [4, 2]>, // // Double-register Integer Q-Unary - InstrItinData, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [4, 1]>, // // Quad-register Integer CountQ-Unary - InstrItinData, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [4, 1]>, // // Double-register Integer Binary - InstrItinData, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Binary - InstrItinData, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, // // Double-register Integer Binary (4 cycle) - InstrItinData, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, // // Quad-register Integer Binary (4 cycle) - InstrItinData, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, // // Double-register Integer Subtract - InstrItinData, - InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, // // Quad-register Integer Subtract - InstrItinData, - InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, // // Double-register Integer Subtract - InstrItinData, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, // // Quad-register Integer Subtract - InstrItinData, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, // // Double-register Integer Shift - InstrItinData, - InstrStage<1, [FU_NPipe]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [3, 1, 1]>, // // Quad-register Integer Shift - InstrItinData, - InstrStage<2, [FU_NPipe]>], [4, 1, 1]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [4, 1, 1]>, // // Double-register Integer Shift (4 cycle) - InstrItinData, - InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [4, 1, 1]>, // // Quad-register Integer Shift (4 cycle) - InstrItinData, - InstrStage<2, [FU_NPipe]>], [5, 1, 1]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [5, 1, 1]>, // // Double-register Integer Pair Add Long - InstrItinData, - InstrStage<1, [FU_NPipe]>], [6, 3, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [6, 3, 1]>, // // Quad-register Integer Pair Add Long - InstrItinData, - InstrStage<2, [FU_NPipe]>], [7, 3, 1]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [7, 3, 1]>, // // Double-register Absolute Difference and Accumulate - InstrItinData, - InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>, // // Quad-register Absolute Difference and Accumulate - InstrItinData, - InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>, // // Double-register Integer Multiply (.8, .16) - InstrItinData, - InstrStage<1, [FU_NPipe]>], [6, 2, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [6, 2, 2]>, // // Double-register Integer Multiply (.32) - InstrItinData, - InstrStage<2, [FU_NPipe]>], [7, 2, 1]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [7, 2, 1]>, // // Quad-register Integer Multiply (.8, .16) - InstrItinData, - InstrStage<2, [FU_NPipe]>], [7, 2, 2]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [7, 2, 2]>, // // Quad-register Integer Multiply (.32) - InstrItinData, - InstrStage<1, [FU_NPipe]>, - InstrStage<2, [FU_NLSPipe], 0>, - InstrStage<3, [FU_NPipe]>], [9, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<3, [A8_NPipe]>], [9, 2, 1]>, // // Double-register Integer Multiply-Accumulate (.8, .16) - InstrItinData, - InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>, // // Double-register Integer Multiply-Accumulate (.32) - InstrItinData, - InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>, // // Quad-register Integer Multiply-Accumulate (.8, .16) - InstrItinData, - InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>, + InstrItinData, + InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>, // // Quad-register Integer Multiply-Accumulate (.32) - InstrItinData, - InstrStage<1, [FU_NPipe]>, - InstrStage<2, [FU_NLSPipe], 0>, - InstrStage<3, [FU_NPipe]>], [9, 3, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NPipe]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>, // // Double-register VEXT - InstrItinData, - InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, // // Quad-register VEXT - InstrItinData, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, + InstrItinData, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, // // VTB - InstrItinData, - InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>, - InstrItinData, - InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>, - InstrItinData, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>, - InstrItinData, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, + InstrItinData, + InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>, + InstrItinData, + InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, // // VTBX - InstrItinData, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>, - InstrItinData, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>, - InstrItinData, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, - InstrItinData, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> + InstrItinData, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>, + InstrItinData, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 5027caee974..75320d92995 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -15,9 +15,19 @@ // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical // Reference Manual". // -// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 +// Functional units +def A9_Issue : FuncUnit; // issue +def A9_Pipe0 : FuncUnit; // pipeline 0 +def A9_Pipe1 : FuncUnit; // pipeline 1 +def A9_LSPipe : FuncUnit; // LS pipe +def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe +def A9_DRegsVFP: FuncUnit; // FP register set, VFP side +def A9_DRegsN : FuncUnit; // FP register set, NEON side + +// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1 // -def CortexA9Itineraries : ProcessorItineraries<[ +def CortexA9Itineraries : ProcessorItineraries< + [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [ // VFP and NEON shares the same register file. This means that every VFP // instruction should wait for full completion of the consecutive NEON // instruction and vice-versa. We model this behavior with two artificial FUs: @@ -36,704 +46,704 @@ def CortexA9Itineraries : ProcessorItineraries<[ // Issue through integer pipeline, and execute in NEON unit. // FP Special Register to Integer Register File Move - InstrItinData, - InstrStage<2, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>]>, + InstrItinData, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>]>, // // Single-precision FP Unary - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 2 cycles - InstrStage<3, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1]>, + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision FP Unary - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 2 cycles - InstrStage<3, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1]>, + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Single-precision FP Compare - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 4 cycles - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1]>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision FP Compare - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 4 cycles - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1]>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Single to Double FP Convert - InstrItinData, - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrItinData, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double to Single FP Convert - InstrItinData, - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrItinData, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Single to Half FP Convert - InstrItinData, - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrItinData, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Half to Single FP Convert - InstrItinData, - InstrStage<3, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [2, 1]>, + InstrItinData, + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Single-Precision FP to Integer Convert - InstrItinData, - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrItinData, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double-Precision FP to Integer Convert - InstrItinData, - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrItinData, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Integer to Single-Precision FP Convert - InstrItinData, - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrItinData, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Integer to Double-Precision FP Convert - InstrItinData, - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrItinData, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Single-precision FP ALU - InstrItinData, - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, + InstrItinData, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Double-precision FP ALU - InstrItinData, - InstrStage<5, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, + InstrItinData, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Single-precision FP Multiply - InstrItinData, - InstrStage<6, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 1, 1]>, + InstrItinData, + InstrStage<6, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 1, 1]>, // // Double-precision FP Multiply - InstrItinData, - InstrStage<7, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 1, 1]>, + InstrItinData, + InstrStage<7, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 1, 1]>, // // Single-precision FP MAC - InstrItinData, - InstrStage<9, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>, + InstrItinData, + InstrStage<9, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>, // // Double-precision FP MAC - InstrItinData, - InstrStage<10, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>, + InstrItinData, + InstrStage<10, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>, // // Single-precision FP DIV - InstrItinData, - InstrStage<16, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<10, [FU_NPipe]>], [15, 1, 1]>, + InstrItinData, + InstrStage<16, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<10, [A9_NPipe]>], [15, 1, 1]>, // // Double-precision FP DIV - InstrItinData, - InstrStage<26, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<20, [FU_NPipe]>], [25, 1, 1]>, + InstrItinData, + InstrStage<26, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<20, [A9_NPipe]>], [25, 1, 1]>, // // Single-precision FP SQRT - InstrItinData, - InstrStage<18, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<13, [FU_NPipe]>], [17, 1]>, + InstrItinData, + InstrStage<18, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<13, [A9_NPipe]>], [17, 1]>, // // Double-precision FP SQRT - InstrItinData, - InstrStage<33, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<28, [FU_NPipe]>], [32, 1]>, + InstrItinData, + InstrStage<33, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<28, [A9_NPipe]>], [32, 1]>, // // Integer to Single-precision Move - InstrItinData, + InstrItinData, // Extra 1 latency cycle since wbck is 2 cycles - InstrStage<3, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1]>, + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Integer to Double-precision Move - InstrItinData, + InstrItinData, // Extra 1 latency cycle since wbck is 2 cycles - InstrStage<3, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1, 1]>, + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, // // Single-precision to Integer Move - InstrItinData, - InstrStage<2, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1]>, + InstrItinData, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision to Integer Move - InstrItinData, - InstrStage<2, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1, 1]>, + InstrItinData, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, // // Single-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<2, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>]>, + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, // // Double-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<2, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>]>, + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, // // FP Load Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<2, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>]>, + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, // // Single-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<2, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>]>, + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, // // Double-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<2, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>]>, + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, // // FP Store Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<2, [FU_DRegsN], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>]>, + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, // NEON // Issue through integer pipeline, and execute in NEON unit. // FIXME: Neon pipeline and LdSt unit are multiplexed. // Add some syntactic sugar to model this! // VLD1 // FIXME: We don't model this instruction properly - InstrItinData, - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>]>, + InstrItinData, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, // // VLD2 // FIXME: We don't model this instruction properly - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>], [2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, // // VLD3 // FIXME: We don't model this instruction properly - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>], [2, 2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>, // // VLD4 // FIXME: We don't model this instruction properly - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>], [2, 2, 2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>, // // VST // FIXME: We don't model this instruction properly - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NPipe]>]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, // // Double-register Integer Unary - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2]>, // // Quad-register Integer Unary - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2]>, // // Double-register Integer Q-Unary - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Quad-register Integer CountQ-Unary - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double-register Integer Binary - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Binary - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Double-register Integer Subtract - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, // // Quad-register Integer Subtract - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, // // Double-register Integer Shift - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, // // Quad-register Integer Shift - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, // // Double-register Integer Shift (4 cycle) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Quad-register Integer Shift (4 cycle) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Double-register Integer Binary (4 cycle) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, // // Quad-register Integer Binary (4 cycle) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, // // Double-register Integer Subtract (4 cycle) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, // // Quad-register Integer Subtract (4 cycle) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, // // Double-register Integer Count - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Count // Result written in N3, but that is relative to the last cycle of multicycle, // so we use 4 for those cases - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [4, 2, 2]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 2, 2]>, // // Double-register Absolute Difference and Accumulate - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>, // // Quad-register Absolute Difference and Accumulate - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, // // Double-register Integer Pair Add Long - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 1]>, // // Quad-register Integer Pair Add Long - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 1]>, // // Double-register Integer Multiply (.8, .16) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 2, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 2, 2]>, // // Quad-register Integer Multiply (.8, .16) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 2]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 2, 2]>, // // Double-register Integer Multiply (.32) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 2, 1]>, // // Quad-register Integer Multiply (.32) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 9 cycles - InstrStage<10, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe]>], [9, 2, 1]>, + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [9, 2, 1]>, // // Double-register Integer Multiply-Accumulate (.8, .16) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>, // // Double-register Integer Multiply-Accumulate (.32) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>, // // Quad-register Integer Multiply-Accumulate (.8, .16) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>, // // Quad-register Integer Multiply-Accumulate (.32) - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 9 cycles - InstrStage<10, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]>, + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>, // // Move Immediate - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3]>, // // Double-register Permute Move - InstrItinData, + InstrItinData, // FIXME: all latencies are arbitrary, no information is available - InstrStage<3, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [2, 1]>, // // Quad-register Permute Move // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 3 for those cases - InstrItinData, + InstrItinData, // FIXME: all latencies are arbitrary, no information is available - InstrStage<4, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [3, 1]>, + InstrStage<4, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1]>, // // Integer to Single-precision Move - InstrItinData, + InstrItinData, // FIXME: all latencies are arbitrary, no information is available - InstrStage<3, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [2, 1]>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Integer to Double-precision Move - InstrItinData, + InstrItinData, // FIXME: all latencies are arbitrary, no information is available - InstrStage<3, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [2, 1, 1]>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, // // Single-precision to Integer Move - InstrItinData, + InstrItinData, // FIXME: all latencies are arbitrary, no information is available - InstrStage<3, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [2, 1]>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Double-precision to Integer Move - InstrItinData, + InstrItinData, // FIXME: all latencies are arbitrary, no information is available - InstrStage<3, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [2, 2, 1]>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, // // Integer to Lane Move - InstrItinData, + InstrItinData, // FIXME: all latencies are arbitrary, no information is available - InstrStage<4, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [3, 1, 1]>, + InstrStage<4, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, // // Double-register FP Unary - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 2]>, // // Quad-register FP Unary // Result written in N5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2]>, // // Double-register FP Binary // FIXME: We're using this itin for many instructions and [2, 2] here is too // optimistic. - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 2, 2]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 2, 2]>, // // Quad-register FP Binary // Result written in N5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases // FIXME: We're using this itin for many instructions and [2, 2] here is too // optimistic. - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 8 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2, 2]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, // // Double-register FP Multiple-Accumulate - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, // // Quad-register FP Multiple-Accumulate // Result written in N9, but that is relative to the last cycle of multicycle, // so we use 10 for those cases - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 9 cycles - InstrStage<10, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe]>], [8, 4, 2, 1]>, + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, // // Double-register Reciprical Step - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2, 2]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, // // Quad-register Reciprical Step - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 9 cycles - InstrStage<10, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe]>], [8, 2, 2]>, + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [8, 2, 2]>, // // Double-register Permute - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [2, 2, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>, // // Quad-register Permute // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 3 for those cases - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [3, 3, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>, // // Quad-register Permute (3 cycle issue) // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 4 for those cases - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 8 cycles - InstrStage<9, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<3, [FU_NLSPipe]>], [4, 4, 1, 1]>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>, // // Double-register VEXT - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<7, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [2, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, // // Quad-register VEXT - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 9 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [3, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, // // VTB - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [3, 2, 1]>, - InstrItinData, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 2, 1]>, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [3, 2, 2, 1]>, - InstrItinData, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>, + InstrItinData, // Extra latency cycles since wbck is 8 cycles - InstrStage<9, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 1]>, - InstrItinData, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData, // Extra latency cycles since wbck is 8 cycles - InstrStage<9, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 3, 1]>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>, // // VTBX - InstrItinData, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [3, 1, 2, 1]>, - InstrItinData, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>, + InstrItinData, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [3, 1, 2, 2, 1]>, - InstrItinData, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData, // Extra latency cycles since wbck is 8 cycles - InstrStage<9, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<3, [FU_NPipe]>], [4, 1, 2, 2, 3, 1]>, - InstrItinData, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData, // Extra latency cycles since wbck is 8 cycles - InstrStage<9, [FU_DRegsVFP], 0, Reserved>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 0fef466ad18..f813022d874 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -13,103 +13,107 @@ // Model based on ARM1176 // +// Functional Units +def V6_Pipe : FuncUnit; // pipeline + // Scheduling information derived from "ARM1176JZF-S Technical Reference Manual". // -def ARMV6Itineraries : ProcessorItineraries<[ +def ARMV6Itineraries : ProcessorItineraries< + [V6_Pipe], [ // // No operand cycles - InstrItinData]>, + InstrItinData]>, // // Binary Instructions that produce a result - InstrItinData], [2, 2]>, - InstrItinData], [2, 2, 2]>, - InstrItinData], [2, 2, 1]>, - InstrItinData], [3, 3, 2, 1]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 2, 2]>, + InstrItinData], [2, 2, 1]>, + InstrItinData], [3, 3, 2, 1]>, // // Unary Instructions that produce a result - InstrItinData], [2, 2]>, - InstrItinData], [2, 1]>, - InstrItinData], [3, 2, 1]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [3, 2, 1]>, // // Compare instructions - InstrItinData], [2]>, - InstrItinData], [2, 2]>, - InstrItinData], [2, 1]>, - InstrItinData], [3, 2, 1]>, + InstrItinData], [2]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [3, 2, 1]>, // // Move instructions, unconditional - InstrItinData], [2]>, - InstrItinData], [2, 2]>, - InstrItinData], [2, 1]>, - InstrItinData], [3, 2, 1]>, + InstrItinData], [2]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [3, 2, 1]>, // // Move instructions, conditional - InstrItinData], [3]>, - InstrItinData], [3, 2]>, - InstrItinData], [3, 1]>, - InstrItinData], [4, 2, 1]>, + InstrItinData], [3]>, + InstrItinData], [3, 2]>, + InstrItinData], [3, 1]>, + InstrItinData], [4, 2, 1]>, // Integer multiply pipeline // - InstrItinData], [4, 1, 1]>, - InstrItinData], [4, 1, 1, 2]>, - InstrItinData], [5, 1, 1]>, - InstrItinData], [5, 1, 1, 2]>, - InstrItinData], [6, 1, 1]>, - InstrItinData], [6, 1, 1, 2]>, + InstrItinData], [4, 1, 1]>, + InstrItinData], [4, 1, 1, 2]>, + InstrItinData], [5, 1, 1]>, + InstrItinData], [5, 1, 1, 2]>, + InstrItinData], [6, 1, 1]>, + InstrItinData], [6, 1, 1, 2]>, // Integer load pipeline // // Immediate offset - InstrItinData], [4, 1]>, + InstrItinData], [4, 1]>, // // Register offset - InstrItinData], [4, 1, 1]>, + InstrItinData], [4, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData], [5, 2, 1]>, + InstrItinData], [5, 2, 1]>, // // Immediate offset with update - InstrItinData], [4, 2, 1]>, + InstrItinData], [4, 2, 1]>, // // Register offset with update - InstrItinData], [4, 2, 1, 1]>, + InstrItinData], [4, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData], [5, 2, 2, 1]>, + InstrItinData], [5, 2, 2, 1]>, // // Load multiple - InstrItinData]>, + InstrItinData]>, // Integer store pipeline // // Immediate offset - InstrItinData], [2, 1]>, + InstrItinData], [2, 1]>, // // Register offset - InstrItinData], [2, 1, 1]>, + InstrItinData], [2, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 2, 1]>, // // Immediate offset with update - InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 2, 1]>, // // Register offset with update - InstrItinData], [2, 2, 1, 1]>, + InstrItinData], [2, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData], [2, 2, 2, 1]>, + InstrItinData], [2, 2, 2, 1]>, // // Store multiple - InstrItinData]>, + InstrItinData]>, // Branch // // no delay slots, so the latency of a branch is unimportant - InstrItinData]>, + InstrItinData]>, // VFP // Issue through integer pipeline, and execute in NEON unit. We assume @@ -117,84 +121,84 @@ def ARMV6Itineraries : ProcessorItineraries<[ // possible. // // FP Special Register to Integer Register File Move - InstrItinData], [3]>, + InstrItinData], [3]>, // // Single-precision FP Unary - InstrItinData], [5, 2]>, + InstrItinData], [5, 2]>, // // Double-precision FP Unary - InstrItinData], [5, 2]>, + InstrItinData], [5, 2]>, // // Single-precision FP Compare - InstrItinData], [2, 2]>, + InstrItinData], [2, 2]>, // // Double-precision FP Compare - InstrItinData], [2, 2]>, + InstrItinData], [2, 2]>, // // Single to Double FP Convert - InstrItinData], [5, 2]>, + InstrItinData], [5, 2]>, // // Double to Single FP Convert - InstrItinData], [5, 2]>, + InstrItinData], [5, 2]>, // // Single-Precision FP to Integer Convert - InstrItinData], [9, 2]>, + InstrItinData], [9, 2]>, // // Double-Precision FP to Integer Convert - InstrItinData], [9, 2]>, + InstrItinData], [9, 2]>, // // Integer to Single-Precision FP Convert - InstrItinData], [9, 2]>, + InstrItinData], [9, 2]>, // // Integer to Double-Precision FP Convert - InstrItinData], [9, 2]>, + InstrItinData], [9, 2]>, // // Single-precision FP ALU - InstrItinData], [9, 2, 2]>, + InstrItinData], [9, 2, 2]>, // // Double-precision FP ALU - InstrItinData], [9, 2, 2]>, + InstrItinData], [9, 2, 2]>, // // Single-precision FP Multiply - InstrItinData], [9, 2, 2]>, + InstrItinData], [9, 2, 2]>, // // Double-precision FP Multiply - InstrItinData], [9, 2, 2]>, + InstrItinData], [9, 2, 2]>, // // Single-precision FP MAC - InstrItinData], [9, 2, 2, 2]>, + InstrItinData], [9, 2, 2, 2]>, // // Double-precision FP MAC - InstrItinData], [9, 2, 2, 2]>, + InstrItinData], [9, 2, 2, 2]>, // // Single-precision FP DIV - InstrItinData], [20, 2, 2]>, + InstrItinData], [20, 2, 2]>, // // Double-precision FP DIV - InstrItinData], [34, 2, 2]>, + InstrItinData], [34, 2, 2]>, // // Single-precision FP SQRT - InstrItinData], [20, 2, 2]>, + InstrItinData], [20, 2, 2]>, // // Double-precision FP SQRT - InstrItinData], [34, 2, 2]>, + InstrItinData], [34, 2, 2]>, // // Single-precision FP Load - InstrItinData], [5, 2, 2]>, + InstrItinData], [5, 2, 2]>, // // Double-precision FP Load - InstrItinData], [5, 2, 2]>, + InstrItinData], [5, 2, 2]>, // // FP Load Multiple - InstrItinData]>, + InstrItinData]>, // // Single-precision FP Store - InstrItinData], [2, 2, 2]>, + InstrItinData], [2, 2, 2]>, // // Double-precision FP Store // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData], [2, 2, 2]>, + InstrItinData], [2, 2, 2]>, // // FP Store Multiple - InstrItinData]> + InstrItinData]> ]>; diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td index b7b45608470..4dc04b88a70 100644 --- a/lib/Target/Alpha/AlphaSchedule.td +++ b/lib/Target/Alpha/AlphaSchedule.td @@ -53,7 +53,8 @@ def s_pseudo : InstrItinClass; //Table 2­4 Instruction Class Latency in Cycles //modified some -def Alpha21264Itineraries : ProcessorItineraries<[ +def Alpha21264Itineraries : ProcessorItineraries< + [L0, L1, FST0, FST1, U0, U1, FA, FM], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td index 785dc466011..a0b581f1632 100644 --- a/lib/Target/CellSPU/SPUSchedule.td +++ b/lib/Target/CellSPU/SPUSchedule.td @@ -36,7 +36,7 @@ def RotateShift : InstrItinClass; // EVEN_UNIT def ImmLoad : InstrItinClass; // EVEN_UNIT /* Note: The itinerary for the Cell SPU is somewhat contrived... */ -def SPUItineraries : ProcessorItineraries<[ +def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 0c3ca57361c..616a79bf831 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -40,7 +40,7 @@ def IIPseudo : InstrItinClass; //===----------------------------------------------------------------------===// // Mips Generic instruction itineraries. //===----------------------------------------------------------------------===// -def MipsGenericItineraries : ProcessorItineraries<[ +def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index d589414c015..9664f145717 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -15,8 +15,6 @@ def SLU : FuncUnit; // Store/load unit def SRU : FuncUnit; // special register unit def IU1 : FuncUnit; // integer unit 1 (simple) def IU2 : FuncUnit; // integer unit 2 (complex) -def IU3 : FuncUnit; // integer unit 3 (7450 simple) -def IU4 : FuncUnit; // integer unit 4 (7450 simple) def FPU1 : FuncUnit; // floating point unit 1 def FPU2 : FuncUnit; // floating point unit 2 def VPU : FuncUnit; // vector permutation unit @@ -24,7 +22,6 @@ def VIU1 : FuncUnit; // vector integer unit 1 (simple) def VIU2 : FuncUnit; // vector integer unit 2 (complex) def VFPU : FuncUnit; // vector floating point unit - //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for PowerPC // diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index f72194d6de0..73447631b2a 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// -def G3Itineraries : ProcessorItineraries<[ +def G3Itineraries : ProcessorItineraries< + [IU1, IU2, FPU1, BPU, SRU, SLU], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index 92ed20f17ce..7efc693fa8c 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -11,7 +11,8 @@ // //===----------------------------------------------------------------------===// -def G4Itineraries : ProcessorItineraries<[ +def G4Itineraries : ProcessorItineraries< + [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index 7474ba494d1..15056c0cfe4 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -11,7 +11,11 @@ // //===----------------------------------------------------------------------===// -def G4PlusItineraries : ProcessorItineraries<[ +def IU3 : FuncUnit; // integer unit 3 (7450 simple) +def IU4 : FuncUnit; // integer unit 4 (7450 simple) + +def G4PlusItineraries : ProcessorItineraries< + [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index d28214715a7..2dffc48b238 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -11,7 +11,8 @@ // //===----------------------------------------------------------------------===// -def G5Itineraries : ProcessorItineraries<[ +def G5Itineraries : ProcessorItineraries< + [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index fb86a704c47..b04eaf88f73 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -203,7 +203,8 @@ unsigned SubtargetEmitter::CollectAllItinClasses(raw_ostream &OS, // data initialization for the specified itinerary. N is the number // of stages. // -void SubtargetEmitter::FormItineraryStageString(Record *ItinData, +void SubtargetEmitter::FormItineraryStageString(const std::string &Name, + Record *ItinData, std::string &ItinString, unsigned &NStages) { // Get states list @@ -226,7 +227,7 @@ void SubtargetEmitter::FormItineraryStageString(Record *ItinData, // For each unit for (unsigned j = 0, M = UnitList.size(); j < M;) { // Add name and bitwise or - ItinString += UnitList[j]->getName(); + ItinString += Name + "FU::" + UnitList[j]->getName(); if (++j < M) ItinString += " | "; } @@ -279,8 +280,28 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS, // If just no itinerary then don't bother if (ProcItinList.size() < 2) return; + // Emit functional units for all the itineraries. + for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) { + // Next record + Record *Proc = ProcItinList[i]; + + std::vector FUs = Proc->getValueAsListOfDefs("FU"); + if (FUs.empty()) + continue; + + const std::string &Name = Proc->getName(); + OS << "\n// Functional units for itineraries \"" << Name << "\"\n" + << "namespace " << Name << "FU {\n"; + + for (unsigned j = 0, FUN = FUs.size(); j < FUN; ++j) + OS << " const unsigned " << FUs[j]->getName() + << " = 1 << " << j << ";\n"; + + OS << "}\n"; + } + // Begin stages table - std::string StageTable = "static const llvm::InstrStage Stages[] = {\n"; + std::string StageTable = "\nstatic const llvm::InstrStage Stages[] = {\n"; StageTable += " { 0, 0, 0, llvm::InstrStage::Required }, // No itinerary\n"; // Begin operand cycle table @@ -315,7 +336,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS, // Get string and stage count std::string ItinStageString; unsigned NStages; - FormItineraryStageString(ItinData, ItinStageString, NStages); + FormItineraryStageString(Name, ItinData, ItinStageString, NStages); // Get string and operand cycle count std::string ItinOperandCycleString; @@ -567,9 +588,9 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "#include \"llvm/Support/raw_ostream.h\"\n"; OS << "#include \"llvm/Target/SubtargetFeature.h\"\n"; OS << "#include \"llvm/Target/TargetInstrItineraries.h\"\n\n"; - - Enumeration(OS, "FuncUnit", true); - OS<<"\n"; + +// Enumeration(OS, "FuncUnit", true); +// OS<<"\n"; // Enumeration(OS, "InstrItinClass", false); // OS<<"\n"; Enumeration(OS, "SubtargetFeature", true); diff --git a/utils/TableGen/SubtargetEmitter.h b/utils/TableGen/SubtargetEmitter.h index 1d7088fd390..f43a4431d61 100644 --- a/utils/TableGen/SubtargetEmitter.h +++ b/utils/TableGen/SubtargetEmitter.h @@ -34,7 +34,8 @@ class SubtargetEmitter : public TableGenBackend { void CPUKeyValues(raw_ostream &OS); unsigned CollectAllItinClasses(raw_ostream &OS, std::map &ItinClassesMap); - void FormItineraryStageString(Record *ItinData, std::string &ItinString, + void FormItineraryStageString(const std::string &Names, + Record *ItinData, std::string &ItinString, unsigned &NStages); void FormItineraryOperandCycleString(Record *ItinData, std::string &ItinString, unsigned &NOperandCycles);