diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d71e1134ae1..5199a44a0f5 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -2513,8 +2513,8 @@ def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, "vpadd", "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VSHLD, - "vpadd", "f32", +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, + IIC_VBIND, "vpadd", "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 90635ce1fa3..ed900f781ee 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -793,6 +793,34 @@ def CortexA9Itineraries : ProcessorItineraries<[ // NEON // Issue through integer pipeline, and execute in NEON unit. + // + // Double-register Integer Unary + InstrItinData, + // Extra 3 latency cycle since wbck is 6 cycles + InstrStage2<7, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [4, 2]>, + // + // Quad-register Integer Unary + InstrItinData, + // Extra 3 latency cycle since wbck is 6 cycles + InstrStage2<7, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [4, 2]>, + // + // Double-register Integer Q-Unary + InstrItinData, + // Extra 3 latency cycle since wbck is 6 cycles + InstrStage2<7, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData, + // Extra 3 latency cycle since wbck is 6 cycles + InstrStage2<7, [FU_DRegsVFP], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Double-register Integer Binary InstrItinData,