diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 7bd8811bf8c..39900a5d810 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -937,7 +937,65 @@ def CortexA9Itineraries : ProcessorItineraries<[ // Extra 3 latency cycle since wbck is 6 cycles InstrStage2<7, [FU_DRegsVFP], 0, Reserved>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 3, 1]> + InstrStage<2, [FU_NPipe]>], [6, 3, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData, + // Extra 3 latency cycle since wbck is 6 cycles + InstrStage2<7, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [6, 2, 2]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData, + // Extra 3 latency cycle since wbck is 7 cycles + InstrStage2<8, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [7, 2, 2]>, + + // + // Double-register Integer Multiply (.32) + InstrItinData, + // Extra 3 latency cycle since wbck is 7 cycles + InstrStage2<8, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [7, 2, 1]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData, + // Extra 3 latency cycle since wbck is 9 cycles + InstrStage2<10, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<4, [FU_NPipe]>], [9, 2, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData, + // Extra 3 latency cycle since wbck is 6 cycles + InstrStage2<7, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData, + // Extra 3 latency cycle since wbck is 7 cycles + InstrStage2<8, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData, + // Extra 3 latency cycle since wbck is 7 cycles + InstrStage2<8, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData, + // Extra 3 latency cycle since wbck is 9 cycles + InstrStage2<8, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]> ]>;