Define new itin classes for ARM <-> VFP reg moves to distinguish from NEON ops. Define proper scheduling itinerary for them on A9. A8 TRM does not specify latency for them at all :(

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100650 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Anton Korobeynikov
2010-04-07 18:20:02 +00:00
parent 63401e33cb
commit a31c6fb65e
3 changed files with 38 additions and 7 deletions

View File

@@ -306,23 +306,23 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
// //
def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
IIC_VMOVSI, "vmov", "\t$dst, $src", IIC_fpMOVSI, "vmov", "\t$dst, $src",
[(set GPR:$dst, (bitconvert SPR:$src))]>; [(set GPR:$dst, (bitconvert SPR:$src))]>;
def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
IIC_VMOVIS, "vmov", "\t$dst, $src", IIC_fpMOVIS, "vmov", "\t$dst, $src",
[(set SPR:$dst, (bitconvert GPR:$src))]>; [(set SPR:$dst, (bitconvert GPR:$src))]>;
def VMOVRRD : AVConv3I<0b11000101, 0b1011, def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$wb, GPR:$dst2), (ins DPR:$src), (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src", IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src",
[/* FIXME: Can't write pattern for multiple result instr*/]> { [/* FIXME: Can't write pattern for multiple result instr*/]> {
let Inst{7-6} = 0b00; let Inst{7-6} = 0b00;
} }
def VMOVRRS : AVConv3I<0b11000101, 0b1010, def VMOVRRS : AVConv3I<0b11000101, 0b1010,
(outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2), (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
[/* For disassembly only; pattern left blank */]> { [/* For disassembly only; pattern left blank */]> {
let Inst{7-6} = 0b00; let Inst{7-6} = 0b00;
} }
@@ -332,14 +332,14 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
def VMOVDRR : AVConv5I<0b11000100, 0b1011, def VMOVDRR : AVConv5I<0b11000100, 0b1011,
(outs DPR:$dst), (ins GPR:$src1, GPR:$src2), (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
IIC_VMOVID, "vmov", "\t$dst, $src1, $src2", IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2",
[(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> { [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> {
let Inst{7-6} = 0b00; let Inst{7-6} = 0b00;
} }
def VMOVSRR : AVConv5I<0b11000100, 0b1010, def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_VMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
[/* For disassembly only; pattern left blank */]> { [/* For disassembly only; pattern left blank */]> {
let Inst{7-6} = 0b00; let Inst{7-6} = 0b00;
} }

View File

@@ -77,6 +77,10 @@ def IIC_fpCVTIS : InstrItinClass;
def IIC_fpCVTID : InstrItinClass; def IIC_fpCVTID : InstrItinClass;
def IIC_fpCVTSI : InstrItinClass; def IIC_fpCVTSI : InstrItinClass;
def IIC_fpCVTDI : InstrItinClass; def IIC_fpCVTDI : InstrItinClass;
def IIC_fpMOVIS : InstrItinClass;
def IIC_fpMOVID : InstrItinClass;
def IIC_fpMOVSI : InstrItinClass;
def IIC_fpMOVDI : InstrItinClass;
def IIC_fpALU32 : InstrItinClass; def IIC_fpALU32 : InstrItinClass;
def IIC_fpALU64 : InstrItinClass; def IIC_fpALU64 : InstrItinClass;
def IIC_fpMUL32 : InstrItinClass; def IIC_fpMUL32 : InstrItinClass;

View File

@@ -753,7 +753,34 @@ def CortexA9Itineraries : ProcessorItineraries<[
InstrItinData<IIC_fpSQRT64, [InstrStage2<1, [FU_DRegsVFP], 0, Required>, InstrItinData<IIC_fpSQRT64, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
InstrStage2<33, [FU_DRegsN], 0, Reserved>, InstrStage2<33, [FU_DRegsN], 0, Reserved>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<28, [FU_NPipe]>], [32, 1]> InstrStage<28, [FU_NPipe]>], [32, 1]>,
//
// Integer to Single-precision Move
InstrItinData<IIC_fpMOVIS, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage2<3, [FU_DRegsN], 0, Reserved>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [1, 1]>,
//
// Integer to Double-precision Move
InstrItinData<IIC_fpMOVID, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage2<3, [FU_DRegsN], 0, Reserved>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
//
// Single-precision to Integer Move
InstrItinData<IIC_fpMOVSI, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
InstrStage2<2, [FU_DRegsN], 0, Reserved>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [1, 1]>,
//
// Double-precision to Integer Move
InstrItinData<IIC_fpMOVDI, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
InstrStage2<2, [FU_DRegsN], 0, Reserved>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_NPipe]>], [1, 1, 1]>
]>; ]>;