diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index f7c16bb9c22..a476df0b2b4 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -36,20 +36,20 @@ def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>; let canFoldAsLoad = 1 in { def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad, "fldd", " $dst, $addr", + IIC_fpLoad64, "fldd", " $dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad, "flds", " $dst, $addr", + IIC_fpLoad32, "flds", " $dst, $addr", [(set SPR:$dst, (load addrmode5:$addr))]>; } // canFoldAsLoad def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), - IIC_fpStore, "fstd", " $src, $addr", + IIC_fpStore64, "fstd", " $src, $addr", [(store DPR:$src, addrmode5:$addr)]>; def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), - IIC_fpStore, "fsts", " $src, $addr", + IIC_fpStore32, "fsts", " $src, $addr", [(store SPR:$src, addrmode5:$addr)]>; //===----------------------------------------------------------------------===// @@ -58,14 +58,14 @@ def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), let mayLoad = 1 in { def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1, - variable_ops), IIC_fpLoad, + variable_ops), IIC_fpLoadm, "fldm${addr:submode}d${p} ${addr:base}, $dst1", []> { let Inst{20} = 1; } def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1, - variable_ops), IIC_fpLoad, + variable_ops), IIC_fpLoadm, "fldm${addr:submode}s${p} ${addr:base}, $dst1", []> { let Inst{20} = 1; @@ -74,14 +74,14 @@ def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1, let mayStore = 1 in { def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1, - variable_ops), IIC_fpStore, + variable_ops), IIC_fpStorem, "fstm${addr:submode}d${p} ${addr:base}, $src1", []> { let Inst{20} = 0; } def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1, - variable_ops), IIC_fpStore, + variable_ops), IIC_fpStorem, "fstm${addr:submode}s${p} ${addr:base}, $src1", []> { let Inst{20} = 0; @@ -95,48 +95,48 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1, // def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU, "faddd", " $dst, $a, $b", + IIC_fpALU64, "faddd", " $dst, $a, $b", [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU, "fadds", " $dst, $a, $b", + IIC_fpALU32, "fadds", " $dst, $a, $b", [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; // These are encoded as unary instructions. let Defs = [FPSCR] in { def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), - IIC_fpALU, "fcmped", " $a, $b", + IIC_fpCMP64, "fcmped", " $a, $b", [(arm_cmpfp DPR:$a, DPR:$b)]>; def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), - IIC_fpALU, "fcmpes", " $a, $b", + IIC_fpCMP32, "fcmpes", " $a, $b", [(arm_cmpfp SPR:$a, SPR:$b)]>; } def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU, "fdivd", " $dst, $a, $b", + IIC_fpDIV64, "fdivd", " $dst, $a, $b", [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>; def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU, "fdivs", " $dst, $a, $b", + IIC_fpDIV32, "fdivs", " $dst, $a, $b", [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU, "fmuld", " $dst, $a, $b", + IIC_fpMUL64, "fmuld", " $dst, $a, $b", [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU, "fmuls", " $dst, $a, $b", + IIC_fpMUL32, "fmuls", " $dst, $a, $b", [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU, "fnmuld", " $dst, $a, $b", + IIC_fpMUL64, "fnmuld", " $dst, $a, $b", [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> { let Inst{6} = 1; } def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU, "fnmuls", " $dst, $a, $b", + IIC_fpMUL32, "fnmuls", " $dst, $a, $b", [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> { let Inst{6} = 1; } @@ -149,13 +149,13 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b), def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU, "fsubd", " $dst, $a, $b", + IIC_fpALU64, "fsubd", " $dst, $a, $b", [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> { let Inst{6} = 1; } def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU, "fsubs", " $dst, $a, $b", + IIC_fpALU32, "fsubs", " $dst, $a, $b", [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { let Inst{6} = 1; } @@ -165,30 +165,30 @@ def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), // def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpALU, "fabsd", " $dst, $a", + IIC_fpUNA64, "fabsd", " $dst, $a", [(set DPR:$dst, (fabs DPR:$a))]>; def FABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpALU, "fabss", " $dst, $a", + IIC_fpUNA32, "fabss", " $dst, $a", [(set SPR:$dst, (fabs SPR:$a))]>; let Defs = [FPSCR] in { def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), - IIC_fpALU, "fcmpezd", " $a", + IIC_fpCMP64, "fcmpezd", " $a", [(arm_cmpfp0 DPR:$a)]>; def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), - IIC_fpALU, "fcmpezs", " $a", + IIC_fpCMP32, "fcmpezs", " $a", [(arm_cmpfp0 SPR:$a)]>; } def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), - IIC_fpALU, "fcvtds", " $dst, $a", + IIC_fpCVTDS, "fcvtds", " $dst, $a", [(set DPR:$dst, (fextend SPR:$a))]>; // Special case encoding: bits 11-8 is 0b1011. def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, - IIC_fpALU, "fcvtsd", " $dst, $a", + IIC_fpCVTSD, "fcvtsd", " $dst, $a", [(set SPR:$dst, (fround DPR:$a))]> { let Inst{27-23} = 0b11101; let Inst{21-16} = 0b110111; @@ -198,26 +198,26 @@ def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, let neverHasSideEffects = 1 in { def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpALU, "fcpyd", " $dst, $a", []>; + IIC_fpUNA64, "fcpyd", " $dst, $a", []>; def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpALU, "fcpys", " $dst, $a", []>; + IIC_fpUNA32, "fcpys", " $dst, $a", []>; } // neverHasSideEffects def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpALU, "fnegd", " $dst, $a", + IIC_fpUNA64, "fnegd", " $dst, $a", [(set DPR:$dst, (fneg DPR:$a))]>; def FNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpALU, "fnegs", " $dst, $a", + IIC_fpUNA32, "fnegs", " $dst, $a", [(set SPR:$dst, (fneg SPR:$a))]>; def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpALU, "fsqrtd", " $dst, $a", + IIC_fpSQRT64, "fsqrtd", " $dst, $a", [(set DPR:$dst, (fsqrt DPR:$a))]>; def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpALU, "fsqrts", " $dst, $a", + IIC_fpSQRT32, "fsqrts", " $dst, $a", [(set SPR:$dst, (fsqrt SPR:$a))]>; //===----------------------------------------------------------------------===// @@ -225,16 +225,16 @@ def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), // def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_fpALU, "fmrs", " $dst, $src", + IIC_fpMOVSI, "fmrs", " $dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_fpALU, "fmsr", " $dst, $src", + IIC_fpMOVIS, "fmsr", " $dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; def FMRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$dst1, GPR:$dst2), (ins DPR:$src), - IIC_fpALU, "fmrrd", " $dst1, $dst2, $src", + IIC_fpMOVDI, "fmrrd", " $dst1, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]>; // FMDHR: GPR -> SPR @@ -242,7 +242,7 @@ def FMRRD : AVConv3I<0b11000101, 0b1011, def FMDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_fpALU, "fmdrr", " $dst, $src1, $src2", + IIC_fpMOVID, "fmdrr", " $dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; // FMRDH: SPR -> GPR @@ -258,23 +258,23 @@ def FMDRR : AVConv5I<0b11000100, 0b1011, // Int to FP: def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - IIC_fpALU, "fsitod", " $dst, $a", + IIC_fpCVTID, "fsitod", " $dst, $a", [(set DPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), - IIC_fpALU, "fsitos", " $dst, $a", + IIC_fpCVTIS, "fsitos", " $dst, $a", [(set SPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - IIC_fpALU, "fuitod", " $dst, $a", + IIC_fpCVTID, "fuitod", " $dst, $a", [(set DPR:$dst, (arm_uitof SPR:$a))]>; def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), - IIC_fpALU, "fuitos", " $dst, $a", + IIC_fpCVTIS, "fuitos", " $dst, $a", [(set SPR:$dst, (arm_uitof SPR:$a))]>; // FP to Int: @@ -282,28 +282,28 @@ def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), - IIC_fpALU, "ftosizd", " $dst, $a", + IIC_fpCVTDI, "ftosizd", " $dst, $a", [(set SPR:$dst, (arm_ftosi DPR:$a))]> { let Inst{7} = 1; // Z bit } def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), - IIC_fpALU, "ftosizs", " $dst, $a", + IIC_fpCVTSI, "ftosizs", " $dst, $a", [(set SPR:$dst, (arm_ftosi SPR:$a))]> { let Inst{7} = 1; // Z bit } def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), - IIC_fpALU, "ftouizd", " $dst, $a", + IIC_fpCVTDI, "ftouizd", " $dst, $a", [(set SPR:$dst, (arm_ftoui DPR:$a))]> { let Inst{7} = 1; // Z bit } def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), - IIC_fpALU, "ftouizs", " $dst, $a", + IIC_fpCVTSI, "ftouizs", " $dst, $a", [(set SPR:$dst, (arm_ftoui SPR:$a))]> { let Inst{7} = 1; // Z bit } @@ -313,34 +313,34 @@ def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, // def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpALU, "fmacd", " $dst, $a, $b", + IIC_fpMAC64, "fmacd", " $dst, $a, $b", [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpALU, "fmacs", " $dst, $a, $b", + IIC_fpMAC32, "fmacs", " $dst, $a, $b", [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpALU, "fmscd", " $dst, $a, $b", + IIC_fpMAC64, "fmscd", " $dst, $a, $b", [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpALU, "fmscs", " $dst, $a, $b", + IIC_fpMAC32, "fmscs", " $dst, $a, $b", [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpALU, "fnmacd", " $dst, $a, $b", + IIC_fpMAC64, "fnmacd", " $dst, $a, $b", [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpALU, "fnmacs", " $dst, $a, $b", + IIC_fpMAC32, "fnmacs", " $dst, $a, $b", [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; @@ -352,14 +352,14 @@ def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpALU, "fnmscd", " $dst, $a, $b", + IIC_fpMAC64, "fnmscd", " $dst, $a, $b", [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpALU, "fnmscs", " $dst, $a, $b", + IIC_fpMAC32, "fnmscs", " $dst, $a, $b", [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; @@ -371,25 +371,25 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpALU, "fcpyd", " $dst, $true", + IIC_fpUNA64, "fcpyd", " $dst, $true", [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpALU, "fcpys", " $dst, $true", + IIC_fpUNA32, "fcpys", " $dst, $true", [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpALU, "fnegd", " $dst, $true", + IIC_fpUNA64, "fnegd", " $dst, $true", [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpALU, "fnegs", " $dst, $true", + IIC_fpUNA32, "fnegs", " $dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; @@ -399,7 +399,7 @@ def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, // let Defs = [CPSR], Uses = [FPSCR] in -def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpALU, "fmstat", "", [(arm_fmstat)]> { +def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", [(arm_fmstat)]> { let Inst{27-20} = 0b11101111; let Inst{19-16} = 0b0001; let Inst{15-12} = 0b1111; diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index c73c5b6fe00..b2df8e2f9a4 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -15,6 +15,8 @@ def FU_Pipe0 : FuncUnit; // pipeline 0 def FU_Pipe1 : FuncUnit; // pipeline 1 def FU_LdSt0 : FuncUnit; // pipeline 0 load/store def FU_LdSt1 : FuncUnit; // pipeline 1 load/store +def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe +def FU_NLSPipe : FuncUnit; // NEON LS pipe //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM @@ -59,10 +61,37 @@ def IIC_iStoreiu : InstrItinClass; def IIC_iStoreru : InstrItinClass; def IIC_iStoresiu : InstrItinClass; def IIC_iStorem : InstrItinClass; -def IIC_fpALU : InstrItinClass; -def IIC_fpMPY : InstrItinClass; -def IIC_fpLoad : InstrItinClass; -def IIC_fpStore : InstrItinClass; +def IIC_fpSTAT : InstrItinClass; +def IIC_fpMOVIS : InstrItinClass; +def IIC_fpMOVID : InstrItinClass; +def IIC_fpMOVSI : InstrItinClass; +def IIC_fpMOVDI : InstrItinClass; +def IIC_fpUNA32 : InstrItinClass; +def IIC_fpUNA64 : InstrItinClass; +def IIC_fpCMP32 : InstrItinClass; +def IIC_fpCMP64 : InstrItinClass; +def IIC_fpCVTSD : InstrItinClass; +def IIC_fpCVTDS : InstrItinClass; +def IIC_fpCVTIS : InstrItinClass; +def IIC_fpCVTID : InstrItinClass; +def IIC_fpCVTSI : InstrItinClass; +def IIC_fpCVTDI : InstrItinClass; +def IIC_fpALU32 : InstrItinClass; +def IIC_fpALU64 : InstrItinClass; +def IIC_fpMUL32 : InstrItinClass; +def IIC_fpMUL64 : InstrItinClass; +def IIC_fpMAC32 : InstrItinClass; +def IIC_fpMAC64 : InstrItinClass; +def IIC_fpDIV32 : InstrItinClass; +def IIC_fpDIV64 : InstrItinClass; +def IIC_fpSQRT32 : InstrItinClass; +def IIC_fpSQRT64 : InstrItinClass; +def IIC_fpLoad32 : InstrItinClass; +def IIC_fpLoad64 : InstrItinClass; +def IIC_fpLoadm : InstrItinClass; +def IIC_fpStore32 : InstrItinClass; +def IIC_fpStore64 : InstrItinClass; +def IIC_fpStorem : InstrItinClass; def IIC_Br : InstrItinClass; //===----------------------------------------------------------------------===// @@ -116,12 +145,41 @@ def GenericItineraries : ProcessorItineraries<[ InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData, - InstrStage<1, [FU_LdSt0]>]>, - InstrItinData]> + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData, + InstrStage<1, [FU_LdSt0]>]>, + InstrItinData, + InstrStage<1, [FU_LdSt0]>]>, + InstrItinData, + InstrStage<1, [FU_LdSt0]>]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 1cac9180df1..3eadf4cc2e5 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -61,10 +61,39 @@ def V6Itineraries : ProcessorItineraries<[ InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData, - InstrStage<1, [FU_LdSt0]>]>, - InstrItinData]> + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData, + InstrStage<1, [FU_LdSt0]>]>, + InstrItinData, + InstrStage<1, [FU_LdSt0]>]>, + InstrItinData, + InstrStage<1, [FU_LdSt0]>]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index bf5858171de..ead022ab24a 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -14,7 +14,7 @@ // // Scheduling information derived from "Cortex-A8 Technical Reference Manual". // -// Dual issue pipeline so every itinerary starts with FU_Pipe0 | FU_Pipe1 +// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 // def CortexA8Itineraries : ProcessorItineraries<[ @@ -86,7 +86,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Scaled register offset, issues over 2 cycles InstrItinData, InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1], 0>, + InstrStage<1, [FU_Pipe1]>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>, // @@ -103,14 +103,14 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Scaled register offset with update, issues over 2 cycles InstrItinData, InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1], 0>, + InstrStage<1, [FU_Pipe1]>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>, // // Load multiple InstrItinData, InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1], 0>, + InstrStage<2, [FU_Pipe1]>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>, @@ -120,16 +120,18 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // Immediate offset InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [3, 1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [3, 1]>, // // Register offset InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [3, 1, 1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, // // Scaled register offset, issues over 2 cycles InstrItinData, InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1], 0>, + InstrStage<1, [FU_Pipe1]>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, // @@ -146,14 +148,14 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Scaled register offset with update, issues over 2 cycles InstrItinData, InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1], 0>, + InstrStage<1, [FU_Pipe1]>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>, // // Store multiple InstrItinData, InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1], 0>, + InstrStage<2, [FU_Pipe1]>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>, @@ -162,20 +164,173 @@ def CortexA8Itineraries : ProcessorItineraries<[ // no delay slots, so the latency of a branch is unimportant InstrItinData]>, - // NFP ALU is not pipelined so stall all issues - InstrItinData, - InstrStage<7, [FU_Pipe1], 0>]>, - // VFP MPY is not pipelined so stall all issues - InstrItinData, - InstrStage<7, [FU_Pipe1], 0>]>, - // loads have an extra cycle of latency, but are fully pipelined + // VFP + // Issue through integer pipeline, and execute in NEON unit. We assume + // RunFast mode so that NFP pipeline is used for single-precision when + // possible. + // + // FP Special Register to Integer Register File Move + InstrItinData, + InstrStage<1, [FU_NLSPipe], 1>]>, + // + // Integer to Single-Precision FP Register File Move + InstrItinData, + InstrStage<1, [FU_NLSPipe], 1>]>, + // + // Integer to Double-Precision FP Register File Move + InstrItinData, + InstrStage<1, [FU_NLSPipe], 1>]>, + // + // Single-Precision FP to Integer Register File Move + InstrItinData, + InstrStage<1, [FU_NLSPipe], 1>], [20, 1]>, + // + // Double-Precision FP to Integer Register File Move + InstrItinData, + InstrStage<1, [FU_NLSPipe], 1>], [20, 20, 1]>, + // + // Single-precision FP Unary + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-precision FP Unary + InstrItinData, + InstrStage<4, [FU_NPipe], 0>, + InstrStage<4, [FU_NLSPipe]>]>, + // + // Single-precision FP Compare + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-precision FP Compare + InstrItinData, + InstrStage<4, [FU_NPipe], 0>, + InstrStage<4, [FU_NLSPipe]>]>, + // + // Single to Double FP Convert + InstrItinData, + InstrStage<7, [FU_NPipe], 0>, + InstrStage<7, [FU_NLSPipe]>]>, + // + // Double to Single FP Convert + InstrItinData, + InstrStage<5, [FU_NPipe], 0>, + InstrStage<5, [FU_NLSPipe]>]>, + // + // Single-Precision FP to Integer Convert + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData, + InstrStage<8, [FU_NPipe], 0>, + InstrStage<8, [FU_NLSPipe]>]>, + // + // Integer to Single-Precision FP Convert + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData, + InstrStage<8, [FU_NPipe], 0>, + InstrStage<8, [FU_NLSPipe]>]>, + // + // Single-precision FP ALU + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-precision FP ALU + InstrItinData, + InstrStage<9, [FU_NPipe], 0>, + InstrStage<9, [FU_NLSPipe]>]>, + // + // Single-precision FP Multiply + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-precision FP Multiply + InstrItinData, + InstrStage<11, [FU_NPipe], 0>, + InstrStage<11, [FU_NLSPipe]>]>, + // + // Single-precision FP MAC + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-precision FP MAC + InstrItinData, + InstrStage<19, [FU_NPipe], 0>, + InstrStage<19, [FU_NLSPipe]>]>, + // + // Single-precision FP DIV + InstrItinData, + InstrStage<20, [FU_NPipe], 0>, + InstrStage<20, [FU_NLSPipe]>]>, + // + // Double-precision FP DIV + InstrItinData, + InstrStage<29, [FU_NPipe], 0>, + InstrStage<29, [FU_NLSPipe]>]>, + // + // Single-precision FP SQRT + InstrItinData, + InstrStage<19, [FU_NPipe], 0>, + InstrStage<19, [FU_NLSPipe]>]>, + // + // Double-precision FP SQRT + InstrItinData, + InstrStage<29, [FU_NPipe], 0>, + InstrStage<29, [FU_NLSPipe]>]>, + // + // Single-precision FP Load // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + InstrItinData, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // Double-precision FP Load // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>]> + InstrItinData, + InstrStage<1, [FU_Pipe0], 0>, + InstrStage<1, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // FP Load Multiple + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [FU_Pipe0], 0>, + InstrStage<2, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // Single-precision FP Store + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // Double-precision FP Store + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [FU_Pipe0], 0>, + InstrStage<1, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // FP Store Multiple + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [FU_Pipe0], 0>, + InstrStage<2, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]> ]>; // FIXME @@ -227,10 +382,39 @@ def CortexA9Itineraries : ProcessorItineraries<[ InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData, - InstrStage<1, [FU_LdSt0]>]>, - InstrItinData]> + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData, + InstrStage<1, [FU_LdSt0]>]>, + InstrItinData, + InstrStage<1, [FU_LdSt0]>]>, + InstrItinData, + InstrStage<1, [FU_LdSt0]>]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>;