Make processor FUs unique for given itinerary. This extends the limit of 32

FU per CPU arch to 32 per intinerary allowing precise modelling of quite
complex pipelines in the future.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@101754 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Anton Korobeynikov 2010-04-18 20:31:01 +00:00
parent d65077a509
commit 928eb49cae
15 changed files with 894 additions and 858 deletions

View File

@ -84,11 +84,12 @@ class InstrItinData<InstrItinClass Class, list<InstrStage> stages,
// Processor itineraries - These values represent the set of all itinerary
// classes for a given chip set.
//
class ProcessorItineraries<list<InstrItinData> iid> {
class ProcessorItineraries<list<FuncUnit> fu, list<InstrItinData> iid> {
list<FuncUnit> FU = fu;
list<InstrItinData> IID = iid;
}
// NoItineraries - A marker that can be used by processors without schedule
// info.
def NoItineraries : ProcessorItineraries<[]>;
def NoItineraries : ProcessorItineraries<[], []>;

View File

@ -7,19 +7,6 @@
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Functional units across ARM processors
//
def FU_Issue : FuncUnit; // issue
def FU_Pipe0 : FuncUnit; // pipeline 0
def FU_Pipe1 : FuncUnit; // pipeline 1
def FU_LdSt0 : FuncUnit; // pipeline 0 load/store
def FU_LdSt1 : FuncUnit; // pipeline 1 load/store
def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe
def FU_NLSPipe : FuncUnit; // NEON LS pipe
def FU_DRegsVFP: FuncUnit; // FP register set, VFP side
def FU_DRegsN : FuncUnit; // FP register set, NEON side
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for ARM
//
@ -165,8 +152,7 @@ def IIC_VTBX4 : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
def GenericItineraries : ProcessorItineraries<[]>;
def GenericItineraries : ProcessorItineraries<[], []>;
include "ARMScheduleV6.td"
include "ARMScheduleA8.td"

View File

@ -13,156 +13,164 @@
//
// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
// Functional Units.
def A8_Issue : FuncUnit; // issue
def A8_Pipe0 : FuncUnit; // pipeline 0
def A8_Pipe1 : FuncUnit; // pipeline 1
def A8_LdSt0 : FuncUnit; // pipeline 0 load/store
def A8_LdSt1 : FuncUnit; // pipeline 1 load/store
def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe
def A8_NLSPipe : FuncUnit; // NEON LS pipe
//
// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
//
def CortexA8Itineraries : ProcessorItineraries<[
def CortexA8Itineraries : ProcessorItineraries<
[A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [
// Two fully-pipelined integer ALU pipelines
//
// No operand cycles
InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
//
// Binary Instructions that produce a result
InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 2]>,
InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1]>,
InstrItinData<IIC_iALUsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1, 1]>,
InstrItinData<IIC_iALUi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iALUr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
InstrItinData<IIC_iALUsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
InstrItinData<IIC_iALUsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
//
// Unary Instructions that produce a result
InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iUNAsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iUNAsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iUNAsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
//
// Compare instructions
InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>,
InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iCMPsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iCMPsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iCMPsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
//
// Move instructions, unconditional
InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1]>,
InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>,
InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>,
InstrItinData<IIC_iMOVsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1, 1]>,
InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
InstrItinData<IIC_iMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
InstrItinData<IIC_iMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
//
// Move instructions, conditional
InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>,
InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
// Integer multiply pipeline
// Result written in E5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
//
InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [5, 1, 1]>,
InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe1], 0>,
InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
InstrItinData<IIC_iMUL32 , [InstrStage<1, [FU_Pipe1], 0>,
InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>,
InstrItinData<IIC_iMAC32 , [InstrStage<1, [FU_Pipe1], 0>,
InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
InstrItinData<IIC_iMUL64 , [InstrStage<2, [FU_Pipe1], 0>,
InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
InstrItinData<IIC_iMAC64 , [InstrStage<2, [FU_Pipe1], 0>,
InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>,
InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>,
InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
// Integer load pipeline
//
// loads have an extra cycle of latency, but are fully pipelined
// use FU_Issue to enforce the 1 load/store per cycle limit
// use A8_Issue to enforce the 1 load/store per cycle limit
//
// Immediate offset
InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [3, 1]>,
InstrItinData<IIC_iLoadi , [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [3, 1]>,
//
// Register offset
InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
InstrItinData<IIC_iLoadr , [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
//
// Scaled register offset, issues over 2 cycles
InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0], 0>,
InstrStage<1, [FU_Pipe1]>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>,
InstrItinData<IIC_iLoadsi , [InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
//
// Immediate offset with update
InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>,
InstrItinData<IIC_iLoadiu , [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
//
// Register offset with update
InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>,
InstrItinData<IIC_iLoadru , [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
//
// Scaled register offset with update, issues over 2 cycles
InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0], 0>,
InstrStage<1, [FU_Pipe1]>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>,
InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
//
// Load multiple
InstrItinData<IIC_iLoadm , [InstrStage<2, [FU_Issue], 0>,
InstrStage<2, [FU_Pipe0], 0>,
InstrStage<2, [FU_Pipe1]>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_iLoadm , [InstrStage<2, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>]>,
// Integer store pipeline
//
// use FU_Issue to enforce the 1 load/store per cycle limit
// use A8_Issue to enforce the 1 load/store per cycle limit
//
// Immediate offset
InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [3, 1]>,
InstrItinData<IIC_iStorei , [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [3, 1]>,
//
// Register offset
InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
InstrItinData<IIC_iStorer , [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
//
// Scaled register offset, issues over 2 cycles
InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0], 0>,
InstrStage<1, [FU_Pipe1]>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
//
// Immediate offset with update
InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>,
InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
//
// Register offset with update
InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>,
InstrItinData<IIC_iStoreru , [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
//
// Scaled register offset with update, issues over 2 cycles
InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0], 0>,
InstrStage<1, [FU_Pipe1]>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>,
InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
//
// Store multiple
InstrItinData<IIC_iStorem , [InstrStage<2, [FU_Issue], 0>,
InstrStage<2, [FU_Pipe0], 0>,
InstrStage<2, [FU_Pipe1]>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_iStorem , [InstrStage<2, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>]>,
// Branch
//
// no delay slots, so the latency of a branch is unimportant
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
// VFP
// Issue through integer pipeline, and execute in NEON unit. We assume
@ -170,441 +178,441 @@ def CortexA8Itineraries : ProcessorItineraries<[
// possible.
//
// FP Special Register to Integer Register File Move
InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>]>,
InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Single-precision FP Unary
InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [7, 1]>,
InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [7, 1]>,
//
// Double-precision FP Unary
InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<4, [FU_NPipe], 0>,
InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<4, [A8_NPipe], 0>,
InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
//
// Single-precision FP Compare
InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [1, 1]>,
InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [1, 1]>,
//
// Double-precision FP Compare
InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<4, [FU_NPipe], 0>,
InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<4, [A8_NPipe], 0>,
InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
//
// Single to Double FP Convert
InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<7, [FU_NPipe], 0>,
InstrStage<7, [FU_NLSPipe]>], [7, 1]>,
InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<7, [A8_NPipe], 0>,
InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
//
// Double to Single FP Convert
InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<5, [FU_NPipe], 0>,
InstrStage<5, [FU_NLSPipe]>], [5, 1]>,
InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<5, [A8_NPipe], 0>,
InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
//
// Single-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [7, 1]>,
InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [7, 1]>,
//
// Double-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<8, [FU_NPipe], 0>,
InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<8, [A8_NPipe], 0>,
InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
//
// Integer to Single-Precision FP Convert
InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [7, 1]>,
InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [7, 1]>,
//
// Integer to Double-Precision FP Convert
InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<8, [FU_NPipe], 0>,
InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<8, [A8_NPipe], 0>,
InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
//
// Single-precision FP ALU
InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
//
// Double-precision FP ALU
InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<9, [FU_NPipe], 0>,
InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>,
InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<9, [A8_NPipe], 0>,
InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
//
// Single-precision FP Multiply
InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
//
// Double-precision FP Multiply
InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<11, [FU_NPipe], 0>,
InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>,
InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<11, [A8_NPipe], 0>,
InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
//
// Single-precision FP MAC
InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>,
InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
//
// Double-precision FP MAC
InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<19, [FU_NPipe], 0>,
InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>,
InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<19, [A8_NPipe], 0>,
InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
//
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<20, [FU_NPipe], 0>,
InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>,
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<20, [A8_NPipe], 0>,
InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
//
// Double-precision FP DIV
InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<29, [FU_NPipe], 0>,
InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>,
InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<29, [A8_NPipe], 0>,
InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
//
// Single-precision FP SQRT
InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<19, [FU_NPipe], 0>,
InstrStage<19, [FU_NLSPipe]>], [19, 1]>,
InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<19, [A8_NPipe], 0>,
InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
//
// Double-precision FP SQRT
InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<29, [FU_NPipe], 0>,
InstrStage<29, [FU_NLSPipe]>], [29, 1]>,
InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<29, [A8_NPipe], 0>,
InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
//
// Single-precision FP Load
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad32, [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>]>,
// use A8_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Double-precision FP Load
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad64, [InstrStage<2, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0], 0>,
InstrStage<1, [FU_Pipe1]>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>]>,
// use A8_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// FP Load Multiple
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoadm, [InstrStage<3, [FU_Issue], 0>,
InstrStage<2, [FU_Pipe0], 0>,
InstrStage<2, [FU_Pipe1]>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>]>,
// use A8_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Single-precision FP Store
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore32,[InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>]>,
// use A8_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Double-precision FP Store
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore64,[InstrStage<2, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0], 0>,
InstrStage<1, [FU_Pipe1]>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>]>,
// use A8_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// FP Store Multiple
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStorem, [InstrStage<3, [FU_Issue], 0>,
InstrStage<2, [FU_Pipe0], 0>,
InstrStage<2, [FU_Pipe1]>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>]>,
// use A8_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
// NEON
// Issue through integer pipeline, and execute in NEON unit.
//
// VLD1
// FIXME: We don't model this instruction properly
InstrItinData<IIC_VLD1, [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>]>,
InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// VLD2
// FIXME: We don't model this instruction properly
InstrItinData<IIC_VLD2, [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>,
InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
//
// VLD3
// FIXME: We don't model this instruction properly
InstrItinData<IIC_VLD3, [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>,
InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
//
// VLD4
// FIXME: We don't model this instruction properly
InstrItinData<IIC_VLD4, [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>,
InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
//
// VST
// FIXME: We don't model this instruction properly
InstrItinData<IIC_VST, [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0], 0>,
InstrStage<1, [FU_NLSPipe]>]>,
InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Double-register FP Unary
InstrItinData<IIC_VUNAD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [5, 2]>,
InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [5, 2]>,
//
// Quad-register FP Unary
// Result written in N5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
InstrItinData<IIC_VUNAQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [6, 2]>,
InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [6, 2]>,
//
// Double-register FP Binary
InstrItinData<IIC_VBIND, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
//
// Quad-register FP Binary
// Result written in N5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
InstrItinData<IIC_VBINQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
//
// Move Immediate
InstrItinData<IIC_VMOVImm, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [3]>,
InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [3]>,
//
// Double-register Permute Move
InstrItinData<IIC_VMOVD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
//
// Quad-register Permute Move
// Result written in N2, but that is relative to the last cycle of multicycle,
// so we use 3 for those cases
InstrItinData<IIC_VMOVQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NLSPipe]>], [3, 1]>,
InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
//
// Integer to Single-precision Move
InstrItinData<IIC_VMOVIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
//
// Integer to Double-precision Move
InstrItinData<IIC_VMOVID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
//
// Single-precision to Integer Move
InstrItinData<IIC_VMOVSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>], [20, 1]>,
InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
//
// Double-precision to Integer Move
InstrItinData<IIC_VMOVDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>,
InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
//
// Integer to Lane Move
InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
//
// Double-register Permute
InstrItinData<IIC_VPERMD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>,
InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
//
// Quad-register Permute
// Result written in N2, but that is relative to the last cycle of multicycle,
// so we use 3 for those cases
InstrItinData<IIC_VPERMQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>,
InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
//
// Quad-register Permute (3 cycle issue)
// Result written in N2, but that is relative to the last cycle of multicycle,
// so we use 4 for those cases
InstrItinData<IIC_VPERMQ3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>,
InstrStage<1, [FU_NPipe], 0>,
InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>,
InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
//
// Double-register FP Multiple-Accumulate
InstrItinData<IIC_VMACD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [9, 3, 2, 2]>,
InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
//
// Quad-register FP Multiple-Accumulate
// Result written in N9, but that is relative to the last cycle of multicycle,
// so we use 10 for those cases
InstrItinData<IIC_VMACQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [10, 3, 2, 2]>,
InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
//
// Double-register Reciprical Step
InstrItinData<IIC_VRECSD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [9, 2, 2]>,
InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
//
// Quad-register Reciprical Step
InstrItinData<IIC_VRECSQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [10, 2, 2]>,
InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
//
// Double-register Integer Count
InstrItinData<IIC_VCNTiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
//
// Quad-register Integer Count
// Result written in N3, but that is relative to the last cycle of multicycle,
// so we use 4 for those cases
InstrItinData<IIC_VCNTiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
//
// Double-register Integer Unary
InstrItinData<IIC_VUNAiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [4, 2]>,
InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [4, 2]>,
//
// Quad-register Integer Unary
InstrItinData<IIC_VUNAiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [4, 2]>,
InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [4, 2]>,
//
// Double-register Integer Q-Unary
InstrItinData<IIC_VQUNAiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [4, 1]>,
InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [4, 1]>,
//
// Quad-register Integer CountQ-Unary
InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [4, 1]>,
InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [4, 1]>,
//
// Double-register Integer Binary
InstrItinData<IIC_VBINiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
//
// Quad-register Integer Binary
InstrItinData<IIC_VBINiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
//
// Double-register Integer Binary (4 cycle)
InstrItinData<IIC_VBINi4D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
//
// Quad-register Integer Binary (4 cycle)
InstrItinData<IIC_VBINi4Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
//
// Double-register Integer Subtract
InstrItinData<IIC_VSUBiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
//
// Quad-register Integer Subtract
InstrItinData<IIC_VSUBiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
//
// Double-register Integer Subtract
InstrItinData<IIC_VSUBi4D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
//
// Quad-register Integer Subtract
InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
//
// Double-register Integer Shift
InstrItinData<IIC_VSHLiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
//
// Quad-register Integer Shift
InstrItinData<IIC_VSHLiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [4, 1, 1]>,
InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
//
// Double-register Integer Shift (4 cycle)
InstrItinData<IIC_VSHLi4D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
//
// Quad-register Integer Shift (4 cycle)
InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [5, 1, 1]>,
InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
//
// Double-register Integer Pair Add Long
InstrItinData<IIC_VPALiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [6, 3, 1]>,
InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
//
// Quad-register Integer Pair Add Long
InstrItinData<IIC_VPALiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [7, 3, 1]>,
InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
//
// Double-register Absolute Difference and Accumulate
InstrItinData<IIC_VABAD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
//
// Quad-register Absolute Difference and Accumulate
InstrItinData<IIC_VABAQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
//
// Double-register Integer Multiply (.8, .16)
InstrItinData<IIC_VMULi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
//
// Double-register Integer Multiply (.32)
InstrItinData<IIC_VMULi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
//
// Quad-register Integer Multiply (.8, .16)
InstrItinData<IIC_VMULi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
//
// Quad-register Integer Multiply (.32)
InstrItinData<IIC_VMULi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>,
InstrStage<2, [FU_NLSPipe], 0>,
InstrStage<3, [FU_NPipe]>], [9, 2, 1]>,
InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>,
InstrStage<2, [A8_NLSPipe], 0>,
InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
//
// Double-register Integer Multiply-Accumulate (.8, .16)
InstrItinData<IIC_VMACi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>,
InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
//
// Double-register Integer Multiply-Accumulate (.32)
InstrItinData<IIC_VMACi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
//
// Quad-register Integer Multiply-Accumulate (.8, .16)
InstrItinData<IIC_VMACi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>,
InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
//
// Quad-register Integer Multiply-Accumulate (.32)
InstrItinData<IIC_VMACi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>,
InstrStage<2, [FU_NLSPipe], 0>,
InstrStage<3, [FU_NPipe]>], [9, 3, 2, 1]>,
InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NPipe]>,
InstrStage<2, [A8_NLSPipe], 0>,
InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
//
// Double-register VEXT
InstrItinData<IIC_VEXTD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
//
// Quad-register VEXT
InstrItinData<IIC_VEXTQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
//
// VTB
InstrItinData<IIC_VTB1, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>,
InstrItinData<IIC_VTB2, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>,
InstrItinData<IIC_VTB3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>,
InstrStage<1, [FU_NPipe], 0>,
InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>,
InstrItinData<IIC_VTB4, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>,
InstrStage<1, [FU_NPipe], 0>,
InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
//
// VTBX
InstrItinData<IIC_VTBX1, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>,
InstrItinData<IIC_VTBX2, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>,
InstrItinData<IIC_VTBX3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>,
InstrStage<1, [FU_NPipe], 0>,
InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
InstrItinData<IIC_VTBX4, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NLSPipe]>,
InstrStage<1, [FU_NPipe], 0>,
InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
]>;

File diff suppressed because it is too large Load Diff

View File

@ -13,103 +13,107 @@
// Model based on ARM1176
//
// Functional Units
def V6_Pipe : FuncUnit; // pipeline
// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual".
//
def ARMV6Itineraries : ProcessorItineraries<[
def ARMV6Itineraries : ProcessorItineraries<
[V6_Pipe], [
//
// No operand cycles
InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>,
//
// Binary Instructions that produce a result
InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>,
InstrItinData<IIC_iALUsr , [InstrStage<2, [FU_Pipe0]>], [3, 3, 2, 1]>,
InstrItinData<IIC_iALUi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
InstrItinData<IIC_iALUr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
//
// Unary Instructions that produce a result
InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
InstrItinData<IIC_iUNAsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
InstrItinData<IIC_iUNAsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
//
// Compare instructions
InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0]>], [2]>,
InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
InstrItinData<IIC_iCMPsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>,
InstrItinData<IIC_iCMPr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
//
// Move instructions, unconditional
InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0]>], [2]>,
InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
InstrItinData<IIC_iMOVsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>,
InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
//
// Move instructions, conditional
InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0]>], [3]>,
InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0]>], [3, 2]>,
InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0]>], [3, 1]>,
InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>,
InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>,
InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
// Integer multiply pipeline
//
InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>,
InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1, 2]>,
InstrItinData<IIC_iMUL32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1]>,
InstrItinData<IIC_iMAC32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1, 2]>,
InstrItinData<IIC_iMUL64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1]>,
InstrItinData<IIC_iMAC64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1, 2]>,
InstrItinData<IIC_iMUL16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
InstrItinData<IIC_iMAC16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>,
InstrItinData<IIC_iMUL32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>,
InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
// Integer load pipeline
//
// Immediate offset
InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Pipe0]>], [4, 1]>,
InstrItinData<IIC_iLoadi , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
//
// Register offset
InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>,
InstrItinData<IIC_iLoadr , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
//
// Scaled register offset, issues over 2 cycles
InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Pipe0]>], [5, 2, 1]>,
InstrItinData<IIC_iLoadsi , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
//
// Immediate offset with update
InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>,
InstrItinData<IIC_iLoadiu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
//
// Register offset with update
InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1, 1]>,
InstrItinData<IIC_iLoadru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
//
// Scaled register offset with update, issues over 2 cycles
InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Pipe0]>], [5, 2, 2, 1]>,
InstrItinData<IIC_iLoadsiu , [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
//
// Load multiple
InstrItinData<IIC_iLoadm , [InstrStage<3, [FU_Pipe0]>]>,
InstrItinData<IIC_iLoadm , [InstrStage<3, [V6_Pipe]>]>,
// Integer store pipeline
//
// Immediate offset
InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
InstrItinData<IIC_iStorei , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
//
// Register offset
InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Pipe0]>], [2, 1, 1]>,
InstrItinData<IIC_iStorer , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
//
// Scaled register offset, issues over 2 cycles
InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Pipe0]>], [2, 2, 1]>,
InstrItinData<IIC_iStoresi , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
//
// Immediate offset with update
InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>,
InstrItinData<IIC_iStoreiu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
//
// Register offset with update
InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1, 1]>,
InstrItinData<IIC_iStoreru , [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
//
// Scaled register offset with update, issues over 2 cycles
InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Pipe0]>], [2, 2, 2, 1]>,
InstrItinData<IIC_iStoresiu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
//
// Store multiple
InstrItinData<IIC_iStorem , [InstrStage<3, [FU_Pipe0]>]>,
InstrItinData<IIC_iStorem , [InstrStage<3, [V6_Pipe]>]>,
// Branch
//
// no delay slots, so the latency of a branch is unimportant
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_Br , [InstrStage<1, [V6_Pipe]>]>,
// VFP
// Issue through integer pipeline, and execute in NEON unit. We assume
@ -117,84 +121,84 @@ def ARMV6Itineraries : ProcessorItineraries<[
// possible.
//
// FP Special Register to Integer Register File Move
InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0]>], [3]>,
InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>,
//
// Single-precision FP Unary
InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
//
// Double-precision FP Unary
InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
//
// Single-precision FP Compare
InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
//
// Double-precision FP Compare
InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
//
// Single to Double FP Convert
InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
//
// Double to Single FP Convert
InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
//
// Single-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
//
// Double-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
//
// Integer to Single-Precision FP Convert
InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
//
// Integer to Double-Precision FP Convert
InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
//
// Single-precision FP ALU
InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
//
// Double-precision FP ALU
InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
//
// Single-precision FP Multiply
InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
//
// Double-precision FP Multiply
InstrItinData<IIC_fpMUL64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2]>,
InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>,
//
// Single-precision FP MAC
InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2, 2]>,
InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
//
// Double-precision FP MAC
InstrItinData<IIC_fpMAC64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2, 2]>,
InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
//
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>,
InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
//
// Double-precision FP DIV
InstrItinData<IIC_fpDIV64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>,
InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
//
// Single-precision FP SQRT
InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>,
InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
//
// Double-precision FP SQRT
InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>,
InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
//
// Single-precision FP Load
InstrItinData<IIC_fpLoad32 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>,
InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
//
// Double-precision FP Load
InstrItinData<IIC_fpLoad64 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>,
InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
//
// FP Load Multiple
InstrItinData<IIC_fpLoadm , [InstrStage<3, [FU_Pipe0]>]>,
InstrItinData<IIC_fpLoadm , [InstrStage<3, [V6_Pipe]>]>,
//
// Single-precision FP Store
InstrItinData<IIC_fpStore32 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
//
// Double-precision FP Store
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore64 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
//
// FP Store Multiple
InstrItinData<IIC_fpStorem , [InstrStage<3, [FU_Pipe0]>]>
InstrItinData<IIC_fpStorem , [InstrStage<3, [V6_Pipe]>]>
]>;

View File

@ -53,7 +53,8 @@ def s_pseudo : InstrItinClass;
//Table 2­4 Instruction Class Latency in Cycles
//modified some
def Alpha21264Itineraries : ProcessorItineraries<[
def Alpha21264Itineraries : ProcessorItineraries<
[L0, L1, FST0, FST1, U0, U1, FA, FM], [
InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>,
InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>,
InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>,

View File

@ -36,7 +36,7 @@ def RotateShift : InstrItinClass; // EVEN_UNIT
def ImmLoad : InstrItinClass; // EVEN_UNIT
/* Note: The itinerary for the Cell SPU is somewhat contrived... */
def SPUItineraries : ProcessorItineraries<[
def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [
InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>,
InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>,
InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>,

View File

@ -40,7 +40,7 @@ def IIPseudo : InstrItinClass;
//===----------------------------------------------------------------------===//
// Mips Generic instruction itineraries.
//===----------------------------------------------------------------------===//
def MipsGenericItineraries : ProcessorItineraries<[
def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [
InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,

View File

@ -15,8 +15,6 @@ def SLU : FuncUnit; // Store/load unit
def SRU : FuncUnit; // special register unit
def IU1 : FuncUnit; // integer unit 1 (simple)
def IU2 : FuncUnit; // integer unit 2 (complex)
def IU3 : FuncUnit; // integer unit 3 (7450 simple)
def IU4 : FuncUnit; // integer unit 4 (7450 simple)
def FPU1 : FuncUnit; // floating point unit 1
def FPU2 : FuncUnit; // floating point unit 2
def VPU : FuncUnit; // vector permutation unit
@ -24,7 +22,6 @@ def VIU1 : FuncUnit; // vector integer unit 1 (simple)
def VIU2 : FuncUnit; // vector integer unit 2 (complex)
def VFPU : FuncUnit; // vector floating point unit
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for PowerPC
//

View File

@ -12,7 +12,8 @@
//===----------------------------------------------------------------------===//
def G3Itineraries : ProcessorItineraries<[
def G3Itineraries : ProcessorItineraries<
[IU1, IU2, FPU1, BPU, SRU, SLU], [
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,

View File

@ -11,7 +11,8 @@
//
//===----------------------------------------------------------------------===//
def G4Itineraries : ProcessorItineraries<[
def G4Itineraries : ProcessorItineraries<
[IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,

View File

@ -11,7 +11,11 @@
//
//===----------------------------------------------------------------------===//
def G4PlusItineraries : ProcessorItineraries<[
def IU3 : FuncUnit; // integer unit 3 (7450 simple)
def IU4 : FuncUnit; // integer unit 4 (7450 simple)
def G4PlusItineraries : ProcessorItineraries<
[IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,

View File

@ -11,7 +11,8 @@
//
//===----------------------------------------------------------------------===//
def G5Itineraries : ProcessorItineraries<[
def G5Itineraries : ProcessorItineraries<
[IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [
InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,

View File

@ -203,7 +203,8 @@ unsigned SubtargetEmitter::CollectAllItinClasses(raw_ostream &OS,
// data initialization for the specified itinerary. N is the number
// of stages.
//
void SubtargetEmitter::FormItineraryStageString(Record *ItinData,
void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
Record *ItinData,
std::string &ItinString,
unsigned &NStages) {
// Get states list
@ -226,7 +227,7 @@ void SubtargetEmitter::FormItineraryStageString(Record *ItinData,
// For each unit
for (unsigned j = 0, M = UnitList.size(); j < M;) {
// Add name and bitwise or
ItinString += UnitList[j]->getName();
ItinString += Name + "FU::" + UnitList[j]->getName();
if (++j < M) ItinString += " | ";
}
@ -279,8 +280,28 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
// If just no itinerary then don't bother
if (ProcItinList.size() < 2) return;
// Emit functional units for all the itineraries.
for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
// Next record
Record *Proc = ProcItinList[i];
std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
if (FUs.empty())
continue;
const std::string &Name = Proc->getName();
OS << "\n// Functional units for itineraries \"" << Name << "\"\n"
<< "namespace " << Name << "FU {\n";
for (unsigned j = 0, FUN = FUs.size(); j < FUN; ++j)
OS << " const unsigned " << FUs[j]->getName()
<< " = 1 << " << j << ";\n";
OS << "}\n";
}
// Begin stages table
std::string StageTable = "static const llvm::InstrStage Stages[] = {\n";
std::string StageTable = "\nstatic const llvm::InstrStage Stages[] = {\n";
StageTable += " { 0, 0, 0, llvm::InstrStage::Required }, // No itinerary\n";
// Begin operand cycle table
@ -315,7 +336,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
// Get string and stage count
std::string ItinStageString;
unsigned NStages;
FormItineraryStageString(ItinData, ItinStageString, NStages);
FormItineraryStageString(Name, ItinData, ItinStageString, NStages);
// Get string and operand cycle count
std::string ItinOperandCycleString;
@ -567,9 +588,9 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "#include \"llvm/Support/raw_ostream.h\"\n";
OS << "#include \"llvm/Target/SubtargetFeature.h\"\n";
OS << "#include \"llvm/Target/TargetInstrItineraries.h\"\n\n";
Enumeration(OS, "FuncUnit", true);
OS<<"\n";
// Enumeration(OS, "FuncUnit", true);
// OS<<"\n";
// Enumeration(OS, "InstrItinClass", false);
// OS<<"\n";
Enumeration(OS, "SubtargetFeature", true);

View File

@ -34,7 +34,8 @@ class SubtargetEmitter : public TableGenBackend {
void CPUKeyValues(raw_ostream &OS);
unsigned CollectAllItinClasses(raw_ostream &OS,
std::map<std::string, unsigned> &ItinClassesMap);
void FormItineraryStageString(Record *ItinData, std::string &ItinString,
void FormItineraryStageString(const std::string &Names,
Record *ItinData, std::string &ItinString,
unsigned &NStages);
void FormItineraryOperandCycleString(Record *ItinData, std::string &ItinString,
unsigned &NOperandCycles);