mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-09 11:25:55 +00:00
[ARM64] Increases the Sched Model accuracy for Cortex-A53.
Patch by Dave Estes <cestes@codeaurora.org> http://reviews.llvm.org/D3769 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209001 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -1101,7 +1101,7 @@ class BaseOneOperandData<bits<3> opc, RegisterClass regtype, string asm,
|
|||||||
SDPatternOperator node>
|
SDPatternOperator node>
|
||||||
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
|
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
|
||||||
[(set regtype:$Rd, (node regtype:$Rn))]>,
|
[(set regtype:$Rd, (node regtype:$Rn))]>,
|
||||||
Sched<[WriteI]> {
|
Sched<[WriteI, ReadI]> {
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<5> Rn;
|
bits<5> Rn;
|
||||||
|
|
||||||
@@ -1140,7 +1140,7 @@ class BaseBaseAddSubCarry<bit isSub, RegisterClass regtype, string asm,
|
|||||||
list<dag> pattern>
|
list<dag> pattern>
|
||||||
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
|
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
|
||||||
asm, "\t$Rd, $Rn, $Rm", "", pattern>,
|
asm, "\t$Rd, $Rn, $Rm", "", pattern>,
|
||||||
Sched<[WriteI]> {
|
Sched<[WriteI, ReadI, ReadI]> {
|
||||||
let Uses = [NZCV];
|
let Uses = [NZCV];
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<5> Rn;
|
bits<5> Rn;
|
||||||
@@ -1214,11 +1214,11 @@ class BaseDiv<bit isSigned, RegisterClass regtype, string asm,
|
|||||||
|
|
||||||
multiclass Div<bit isSigned, string asm, SDPatternOperator OpNode> {
|
multiclass Div<bit isSigned, string asm, SDPatternOperator OpNode> {
|
||||||
def Wr : BaseDiv<isSigned, GPR32, asm, OpNode>,
|
def Wr : BaseDiv<isSigned, GPR32, asm, OpNode>,
|
||||||
Sched<[WriteID32]> {
|
Sched<[WriteID32, ReadID, ReadID]> {
|
||||||
let Inst{31} = 0;
|
let Inst{31} = 0;
|
||||||
}
|
}
|
||||||
def Xr : BaseDiv<isSigned, GPR64, asm, OpNode>,
|
def Xr : BaseDiv<isSigned, GPR64, asm, OpNode>,
|
||||||
Sched<[WriteID64]> {
|
Sched<[WriteID64, ReadID, ReadID]> {
|
||||||
let Inst{31} = 1;
|
let Inst{31} = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1226,7 +1226,7 @@ multiclass Div<bit isSigned, string asm, SDPatternOperator OpNode> {
|
|||||||
class BaseShift<bits<2> shift_type, RegisterClass regtype, string asm,
|
class BaseShift<bits<2> shift_type, RegisterClass regtype, string asm,
|
||||||
SDPatternOperator OpNode = null_frag>
|
SDPatternOperator OpNode = null_frag>
|
||||||
: BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>,
|
: BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>,
|
||||||
Sched<[WriteIS]> {
|
Sched<[WriteIS, ReadI]> {
|
||||||
let Inst{11-10} = shift_type;
|
let Inst{11-10} = shift_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1278,13 +1278,13 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
|
|||||||
multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
|
multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
|
||||||
def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
|
def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
|
||||||
[(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
|
[(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
|
||||||
Sched<[WriteIM32]> {
|
Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> {
|
||||||
let Inst{31} = 0;
|
let Inst{31} = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
|
def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
|
||||||
[(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
|
[(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
|
||||||
Sched<[WriteIM64]> {
|
Sched<[WriteIM64, ReadIMA, ReadIM, ReadIM]> {
|
||||||
let Inst{31} = 1;
|
let Inst{31} = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1294,7 +1294,7 @@ class WideMulAccum<bit isSub, bits<3> opc, string asm,
|
|||||||
: BaseMulAccum<isSub, opc, GPR32, GPR64, asm,
|
: BaseMulAccum<isSub, opc, GPR32, GPR64, asm,
|
||||||
[(set GPR64:$Rd, (AccNode GPR64:$Ra,
|
[(set GPR64:$Rd, (AccNode GPR64:$Ra,
|
||||||
(mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>,
|
(mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>,
|
||||||
Sched<[WriteIM32]> {
|
Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> {
|
||||||
let Inst{31} = 1;
|
let Inst{31} = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1302,7 +1302,7 @@ class MulHi<bits<3> opc, string asm, SDNode OpNode>
|
|||||||
: I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
|
: I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
|
||||||
asm, "\t$Rd, $Rn, $Rm", "",
|
asm, "\t$Rd, $Rn, $Rm", "",
|
||||||
[(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>,
|
[(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>,
|
||||||
Sched<[WriteIM64]> {
|
Sched<[WriteIM64, ReadIM, ReadIM]> {
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<5> Rn;
|
bits<5> Rn;
|
||||||
bits<5> Rm;
|
bits<5> Rm;
|
||||||
@@ -1333,7 +1333,7 @@ class BaseCRC32<bit sf, bits<2> sz, bit C, RegisterClass StreamReg,
|
|||||||
: I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm),
|
: I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm),
|
||||||
asm, "\t$Rd, $Rn, $Rm", "",
|
asm, "\t$Rd, $Rn, $Rm", "",
|
||||||
[(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>,
|
[(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>,
|
||||||
Sched<[WriteISReg]> {
|
Sched<[WriteISReg, ReadI, ReadISReg]> {
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<5> Rn;
|
bits<5> Rn;
|
||||||
bits<5> Rm;
|
bits<5> Rm;
|
||||||
@@ -1420,7 +1420,7 @@ class BaseInsertImmediate<bits<2> opc, RegisterClass regtype, Operand shifter,
|
|||||||
: I<(outs regtype:$Rd),
|
: I<(outs regtype:$Rd),
|
||||||
(ins regtype:$src, movimm32_imm:$imm, shifter:$shift),
|
(ins regtype:$src, movimm32_imm:$imm, shifter:$shift),
|
||||||
asm, "\t$Rd, $imm$shift", "$src = $Rd", []>,
|
asm, "\t$Rd, $imm$shift", "$src = $Rd", []>,
|
||||||
Sched<[WriteI]> {
|
Sched<[WriteI, ReadI]> {
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<16> imm;
|
bits<16> imm;
|
||||||
bits<6> shift;
|
bits<6> shift;
|
||||||
@@ -1453,7 +1453,7 @@ class BaseAddSubImm<bit isSub, bit setFlags, RegisterClass dstRegtype,
|
|||||||
: I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm),
|
: I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm),
|
||||||
asm, "\t$Rd, $Rn, $imm", "",
|
asm, "\t$Rd, $Rn, $imm", "",
|
||||||
[(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>,
|
[(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>,
|
||||||
Sched<[WriteI]> {
|
Sched<[WriteI, ReadI]> {
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<5> Rn;
|
bits<5> Rn;
|
||||||
bits<14> imm;
|
bits<14> imm;
|
||||||
@@ -1471,7 +1471,7 @@ class BaseAddSubRegPseudo<RegisterClass regtype,
|
|||||||
SDPatternOperator OpNode>
|
SDPatternOperator OpNode>
|
||||||
: Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
|
: Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
|
||||||
[(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
|
[(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
|
||||||
Sched<[WriteI]>;
|
Sched<[WriteI, ReadI, ReadI]>;
|
||||||
|
|
||||||
class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype,
|
class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype,
|
||||||
arith_shifted_reg shifted_regtype, string asm,
|
arith_shifted_reg shifted_regtype, string asm,
|
||||||
@@ -1479,7 +1479,7 @@ class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype,
|
|||||||
: I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
|
: I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
|
||||||
asm, "\t$Rd, $Rn, $Rm", "",
|
asm, "\t$Rd, $Rn, $Rm", "",
|
||||||
[(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>,
|
[(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>,
|
||||||
Sched<[WriteISReg]> {
|
Sched<[WriteISReg, ReadI, ReadISReg]> {
|
||||||
// The operands are in order to match the 'addr' MI operands, so we
|
// The operands are in order to match the 'addr' MI operands, so we
|
||||||
// don't need an encoder method and by-name matching. Just use the default
|
// don't need an encoder method and by-name matching. Just use the default
|
||||||
// in-order handling. Since we're using by-order, make sure the names
|
// in-order handling. Since we're using by-order, make sure the names
|
||||||
@@ -1508,7 +1508,7 @@ class BaseAddSubEReg<bit isSub, bit setFlags, RegisterClass dstRegtype,
|
|||||||
(ins src1Regtype:$R2, src2Regtype:$R3),
|
(ins src1Regtype:$R2, src2Regtype:$R3),
|
||||||
asm, "\t$R1, $R2, $R3", "",
|
asm, "\t$R1, $R2, $R3", "",
|
||||||
[(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>,
|
[(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>,
|
||||||
Sched<[WriteIEReg]> {
|
Sched<[WriteIEReg, ReadI, ReadIEReg]> {
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<5> Rn;
|
bits<5> Rn;
|
||||||
bits<5> Rm;
|
bits<5> Rm;
|
||||||
@@ -1533,7 +1533,7 @@ class BaseAddSubEReg64<bit isSub, bit setFlags, RegisterClass dstRegtype,
|
|||||||
: I<(outs dstRegtype:$Rd),
|
: I<(outs dstRegtype:$Rd),
|
||||||
(ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext),
|
(ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext),
|
||||||
asm, "\t$Rd, $Rn, $Rm$ext", "", []>,
|
asm, "\t$Rd, $Rn, $Rm$ext", "", []>,
|
||||||
Sched<[WriteIEReg]> {
|
Sched<[WriteIEReg, ReadI, ReadIEReg]> {
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<5> Rn;
|
bits<5> Rn;
|
||||||
bits<5> Rm;
|
bits<5> Rm;
|
||||||
@@ -1746,7 +1746,7 @@ class BaseBitfieldImm<bits<2> opc,
|
|||||||
RegisterClass regtype, Operand imm_type, string asm>
|
RegisterClass regtype, Operand imm_type, string asm>
|
||||||
: I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms),
|
: I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms),
|
||||||
asm, "\t$Rd, $Rn, $immr, $imms", "", []>,
|
asm, "\t$Rd, $Rn, $immr, $imms", "", []>,
|
||||||
Sched<[WriteIS]> {
|
Sched<[WriteIS, ReadI]> {
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<5> Rn;
|
bits<5> Rn;
|
||||||
bits<6> immr;
|
bits<6> immr;
|
||||||
@@ -1780,7 +1780,7 @@ class BaseBitfieldImmWith2RegArgs<bits<2> opc,
|
|||||||
: I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr,
|
: I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr,
|
||||||
imm_type:$imms),
|
imm_type:$imms),
|
||||||
asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>,
|
asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>,
|
||||||
Sched<[WriteIS]> {
|
Sched<[WriteIS, ReadI]> {
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<5> Rn;
|
bits<5> Rn;
|
||||||
bits<6> immr;
|
bits<6> immr;
|
||||||
@@ -1818,7 +1818,7 @@ class BaseLogicalImm<bits<2> opc, RegisterClass dregtype,
|
|||||||
list<dag> pattern>
|
list<dag> pattern>
|
||||||
: I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm),
|
: I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm),
|
||||||
asm, "\t$Rd, $Rn, $imm", "", pattern>,
|
asm, "\t$Rd, $Rn, $imm", "", pattern>,
|
||||||
Sched<[WriteI]> {
|
Sched<[WriteI, ReadI]> {
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
bits<5> Rn;
|
bits<5> Rn;
|
||||||
bits<13> imm;
|
bits<13> imm;
|
||||||
@@ -1839,7 +1839,7 @@ class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype,
|
|||||||
list<dag> pattern>
|
list<dag> pattern>
|
||||||
: I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
|
: I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
|
||||||
asm, "\t$Rd, $Rn, $Rm", "", pattern>,
|
asm, "\t$Rd, $Rn, $Rm", "", pattern>,
|
||||||
Sched<[WriteISReg]> {
|
Sched<[WriteISReg, ReadI, ReadISReg]> {
|
||||||
// The operands are in order to match the 'addr' MI operands, so we
|
// The operands are in order to match the 'addr' MI operands, so we
|
||||||
// don't need an encoder method and by-name matching. Just use the default
|
// don't need an encoder method and by-name matching. Just use the default
|
||||||
// in-order handling. Since we're using by-order, make sure the names
|
// in-order handling. Since we're using by-order, make sure the names
|
||||||
@@ -1897,7 +1897,7 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> {
|
|||||||
class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
|
class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
|
||||||
: Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
|
: Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
|
||||||
[(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
|
[(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
|
||||||
Sched<[WriteI]>;
|
Sched<[WriteI, ReadI, ReadI]>;
|
||||||
|
|
||||||
// Split from LogicalImm as not all instructions have both.
|
// Split from LogicalImm as not all instructions have both.
|
||||||
multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
|
multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
|
||||||
@@ -1953,7 +1953,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
|
|||||||
class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
|
class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
|
||||||
: I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
|
: I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
|
||||||
asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
|
asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
|
||||||
Sched<[WriteI]> {
|
Sched<[WriteI, ReadI]> {
|
||||||
let Uses = [NZCV];
|
let Uses = [NZCV];
|
||||||
let Defs = [NZCV];
|
let Defs = [NZCV];
|
||||||
|
|
||||||
@@ -1985,7 +1985,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
|
|||||||
class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
|
class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
|
||||||
: I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
|
: I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
|
||||||
asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
|
asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
|
||||||
Sched<[WriteI]> {
|
Sched<[WriteI, ReadI, ReadI]> {
|
||||||
let Uses = [NZCV];
|
let Uses = [NZCV];
|
||||||
let Defs = [NZCV];
|
let Defs = [NZCV];
|
||||||
|
|
||||||
@@ -2022,7 +2022,7 @@ class BaseCondSelect<bit op, bits<2> op2, RegisterClass regtype, string asm>
|
|||||||
asm, "\t$Rd, $Rn, $Rm, $cond", "",
|
asm, "\t$Rd, $Rn, $Rm, $cond", "",
|
||||||
[(set regtype:$Rd,
|
[(set regtype:$Rd,
|
||||||
(ARM64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>,
|
(ARM64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>,
|
||||||
Sched<[WriteI]> {
|
Sched<[WriteI, ReadI, ReadI]> {
|
||||||
let Uses = [NZCV];
|
let Uses = [NZCV];
|
||||||
|
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
@@ -2055,7 +2055,7 @@ class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm,
|
|||||||
[(set regtype:$Rd,
|
[(set regtype:$Rd,
|
||||||
(ARM64csel regtype:$Rn, (frag regtype:$Rm),
|
(ARM64csel regtype:$Rn, (frag regtype:$Rm),
|
||||||
(i32 imm:$cond), NZCV))]>,
|
(i32 imm:$cond), NZCV))]>,
|
||||||
Sched<[WriteI]> {
|
Sched<[WriteI, ReadI, ReadI]> {
|
||||||
let Uses = [NZCV];
|
let Uses = [NZCV];
|
||||||
|
|
||||||
bits<5> Rd;
|
bits<5> Rd;
|
||||||
|
@@ -825,6 +825,19 @@ bool ARM64InstrInfo::optimizeCompareInstr(
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return true if this is this instruction has a non-zero immediate
|
||||||
|
bool ARM64InstrInfo::hasNonZeroImm(const MachineInstr *MI) const {
|
||||||
|
switch (MI->getOpcode()) {
|
||||||
|
default:
|
||||||
|
if (MI->getOperand(3).isImm()) {
|
||||||
|
unsigned val = MI->getOperand(3).getImm();
|
||||||
|
return (val != 0);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Return true if this instruction simply sets its single destination register
|
// Return true if this instruction simply sets its single destination register
|
||||||
// to zero. This is equivalent to a register rename of the zero-register.
|
// to zero. This is equivalent to a register rename of the zero-register.
|
||||||
bool ARM64InstrInfo::isGPRZero(const MachineInstr *MI) const {
|
bool ARM64InstrInfo::isGPRZero(const MachineInstr *MI) const {
|
||||||
|
@@ -56,6 +56,9 @@ public:
|
|||||||
unsigned isStoreToStackSlot(const MachineInstr *MI,
|
unsigned isStoreToStackSlot(const MachineInstr *MI,
|
||||||
int &FrameIndex) const override;
|
int &FrameIndex) const override;
|
||||||
|
|
||||||
|
/// \brief Is there a non-zero immediate?
|
||||||
|
bool hasNonZeroImm(const MachineInstr *MI) const;
|
||||||
|
|
||||||
/// \brief Does this instruction set its full destination register to zero?
|
/// \brief Does this instruction set its full destination register to zero?
|
||||||
bool isGPRZero(const MachineInstr *MI) const;
|
bool isGPRZero(const MachineInstr *MI) const;
|
||||||
|
|
||||||
|
@@ -20,7 +20,7 @@ def CortexA53Model : SchedMachineModel {
|
|||||||
let MicroOpBufferSize = 0; // Explicitly set to zero since A53 is in-order.
|
let MicroOpBufferSize = 0; // Explicitly set to zero since A53 is in-order.
|
||||||
let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
|
let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
|
||||||
let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency.
|
let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency.
|
||||||
let LoadLatency = 2; // Optimistic load latency assuming bypass.
|
let LoadLatency = 3; // Optimistic load latency assuming bypass.
|
||||||
// This is overriden by OperandCycles if the
|
// This is overriden by OperandCycles if the
|
||||||
// Itineraries are queried instead.
|
// Itineraries are queried instead.
|
||||||
let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation
|
let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation
|
||||||
@@ -32,7 +32,7 @@ def CortexA53Model : SchedMachineModel {
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Define each kind of processor resource and number available.
|
// Define each kind of processor resource and number available.
|
||||||
|
|
||||||
// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
|
// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
|
||||||
// Cortex-A53 is in-order.
|
// Cortex-A53 is in-order.
|
||||||
|
|
||||||
def A53UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
|
def A53UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
|
||||||
@@ -50,16 +50,16 @@ def A53UnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mult/Div/Sqrt
|
|||||||
|
|
||||||
let SchedModel = CortexA53Model in {
|
let SchedModel = CortexA53Model in {
|
||||||
|
|
||||||
// ALU - These are reduced to 1 despite a true latency of 4 in order to easily
|
// ALU - Despite having a full latency of 4, most of the ALU instructions can
|
||||||
// model forwarding logic. Once forwarding is properly modelled, then
|
// forward a cycle earlier and then two cycles earlier in the case of a
|
||||||
// they'll be corrected.
|
// shift-only instruction. These latencies will be incorrect when the
|
||||||
def : WriteRes<WriteImm, [A53UnitALU]> { let Latency = 1; }
|
// result cannot be forwarded, but modeling isn't rocket surgery.
|
||||||
def : WriteRes<WriteI, [A53UnitALU]> { let Latency = 1; }
|
def : WriteRes<WriteImm, [A53UnitALU]> { let Latency = 3; }
|
||||||
def : WriteRes<WriteISReg, [A53UnitALU]> { let Latency = 1; }
|
def : WriteRes<WriteI, [A53UnitALU]> { let Latency = 3; }
|
||||||
def : WriteRes<WriteIEReg, [A53UnitALU]> { let Latency = 1; }
|
def : WriteRes<WriteISReg, [A53UnitALU]> { let Latency = 3; }
|
||||||
def : WriteRes<WriteExtr, [A53UnitALU]> { let Latency = 1; }
|
def : WriteRes<WriteIEReg, [A53UnitALU]> { let Latency = 3; }
|
||||||
def : WriteRes<WriteIS, [A53UnitALU]> { let Latency = 1; }
|
def : WriteRes<WriteIS, [A53UnitALU]> { let Latency = 2; }
|
||||||
def : WriteRes<WriteAdr, [A53UnitALU]> { let Latency = 1; }
|
def : WriteRes<WriteExtr, [A53UnitALU]> { let Latency = 3; }
|
||||||
|
|
||||||
// MAC
|
// MAC
|
||||||
def : WriteRes<WriteIM32, [A53UnitMAC]> { let Latency = 4; }
|
def : WriteRes<WriteIM32, [A53UnitMAC]> { let Latency = 4; }
|
||||||
@@ -73,14 +73,41 @@ def : WriteRes<WriteID64, [A53UnitDiv]> { let Latency = 4; }
|
|||||||
def : WriteRes<WriteLD, [A53UnitLdSt]> { let Latency = 4; }
|
def : WriteRes<WriteLD, [A53UnitLdSt]> { let Latency = 4; }
|
||||||
def : WriteRes<WriteLDIdx, [A53UnitLdSt]> { let Latency = 4; }
|
def : WriteRes<WriteLDIdx, [A53UnitLdSt]> { let Latency = 4; }
|
||||||
def : WriteRes<WriteLDHi, [A53UnitLdSt]> { let Latency = 4; }
|
def : WriteRes<WriteLDHi, [A53UnitLdSt]> { let Latency = 4; }
|
||||||
def : WriteRes<WriteVLD, [A53UnitLdSt]> { let Latency = 4; }
|
|
||||||
|
// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
|
||||||
|
// below, choosing the median of 3 which makes the latency 6.
|
||||||
|
// May model this more carefully in the future. The remaining
|
||||||
|
// A53WriteVLD# types represent the 1-5 cycle issues explicitly.
|
||||||
|
def : WriteRes<WriteVLD, [A53UnitLdSt]> { let Latency = 6;
|
||||||
|
let ResourceCycles = [3]; }
|
||||||
|
def A53WriteVLD1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; }
|
||||||
|
def A53WriteVLD2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5;
|
||||||
|
let ResourceCycles = [2]; }
|
||||||
|
def A53WriteVLD3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6;
|
||||||
|
let ResourceCycles = [3]; }
|
||||||
|
def A53WriteVLD4 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 7;
|
||||||
|
let ResourceCycles = [4]; }
|
||||||
|
def A53WriteVLD5 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 8;
|
||||||
|
let ResourceCycles = [5]; }
|
||||||
|
|
||||||
|
// Pre/Post Indexing - Performed as part of address generation which is already
|
||||||
|
// accounted for in the WriteST* latencies below
|
||||||
|
def : WriteRes<WriteAdr, []> { let Latency = 0; }
|
||||||
|
|
||||||
// Store
|
// Store
|
||||||
def : WriteRes<WriteST, [A53UnitLdSt]> { let Latency = 4; }
|
def : WriteRes<WriteST, [A53UnitLdSt]> { let Latency = 4; }
|
||||||
def : WriteRes<WriteSTP, [A53UnitLdSt]> { let Latency = 4; }
|
def : WriteRes<WriteSTP, [A53UnitLdSt]> { let Latency = 4; }
|
||||||
def : WriteRes<WriteSTIdx, [A53UnitLdSt]> { let Latency = 4; }
|
def : WriteRes<WriteSTIdx, [A53UnitLdSt]> { let Latency = 4; }
|
||||||
def : WriteRes<WriteSTX, [A53UnitLdSt]> { let Latency = 4; }
|
def : WriteRes<WriteSTX, [A53UnitLdSt]> { let Latency = 4; }
|
||||||
def : WriteRes<WriteVST, [A53UnitLdSt]> { let Latency = 4; }
|
|
||||||
|
// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
|
||||||
|
def : WriteRes<WriteVST, [A53UnitLdSt]> { let Latency = 5;
|
||||||
|
let ResourceCycles = [2];}
|
||||||
|
def A53WriteVST1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; }
|
||||||
|
def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5;
|
||||||
|
let ResourceCycles = [2]; }
|
||||||
|
def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6;
|
||||||
|
let ResourceCycles = [3]; }
|
||||||
|
|
||||||
// Branch
|
// Branch
|
||||||
def : WriteRes<WriteBr, [A53UnitB]>;
|
def : WriteRes<WriteBr, [A53UnitB]>;
|
||||||
@@ -101,29 +128,143 @@ def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; }
|
|||||||
def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
|
def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
|
||||||
def : WriteRes<WriteFDiv, [A53UnitFPMDS]> { let Latency = 33;
|
def : WriteRes<WriteFDiv, [A53UnitFPMDS]> { let Latency = 33;
|
||||||
let ResourceCycles = [29]; }
|
let ResourceCycles = [29]; }
|
||||||
def A53WriteFDiv : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 33;
|
def A53WriteFMAC : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 10; }
|
||||||
let ResourceCycles = [29]; }
|
def A53WriteFDivSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 18;
|
||||||
def A53WriteFSqrt : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32;
|
let ResourceCycles = [14]; }
|
||||||
let ResourceCycles = [28]; }
|
def A53WriteFDivDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 33;
|
||||||
|
let ResourceCycles = [29]; }
|
||||||
|
def A53WriteFSqrtSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 17;
|
||||||
|
let ResourceCycles = [13]; }
|
||||||
|
def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32;
|
||||||
|
let ResourceCycles = [28]; }
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Subtarget-specific SchedRead types.
|
// Subtarget-specific SchedRead types.
|
||||||
|
|
||||||
// While there is no forwarding information defined for these SchedRead types,
|
// No forwarding for these reads.
|
||||||
// they are still used by some instruction via a SchedRW list and so these zero
|
|
||||||
// SchedReadAdvances are required.
|
|
||||||
|
|
||||||
def : ReadAdvance<ReadExtrHi, 0>;
|
def : ReadAdvance<ReadExtrHi, 0>;
|
||||||
def : ReadAdvance<ReadAdrBase, 0>;
|
def : ReadAdvance<ReadAdrBase, 0>;
|
||||||
def : ReadAdvance<ReadVLD, 0>;
|
def : ReadAdvance<ReadVLD, 0>;
|
||||||
|
|
||||||
|
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
|
||||||
|
// operands are needed one cycle later if and only if they are to be
|
||||||
|
// shifted. Otherwise, they too are needed two cycle later.
|
||||||
|
def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
|
||||||
|
WriteISReg, WriteIEReg,WriteIS,
|
||||||
|
WriteID32,WriteID64,
|
||||||
|
WriteIM32,WriteIM64]>;
|
||||||
|
def A53ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
|
||||||
|
WriteISReg, WriteIEReg,WriteIS,
|
||||||
|
WriteID32,WriteID64,
|
||||||
|
WriteIM32,WriteIM64]>;
|
||||||
|
def A53ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
|
||||||
|
WriteISReg, WriteIEReg,WriteIS,
|
||||||
|
WriteID32,WriteID64,
|
||||||
|
WriteIM32,WriteIM64]>;
|
||||||
|
def A53ReadISReg : SchedReadVariant<[
|
||||||
|
SchedVar<RegShiftedPred, [A53ReadShifted]>,
|
||||||
|
SchedVar<NoSchedPred, [A53ReadNotShifted]>]>;
|
||||||
|
def : SchedAlias<ReadISReg, A53ReadISReg>;
|
||||||
|
|
||||||
|
def A53ReadIEReg : SchedReadVariant<[
|
||||||
|
SchedVar<RegShiftedPred, [A53ReadShifted]>,
|
||||||
|
SchedVar<NoSchedPred, [A53ReadNotShifted]>]>;
|
||||||
|
def : SchedAlias<ReadIEReg, A53ReadIEReg>;
|
||||||
|
|
||||||
|
// MAC - Operands are generally needed one cycle later in the MAC pipe.
|
||||||
|
// Accumulator operands are needed two cycles later.
|
||||||
|
def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
|
||||||
|
WriteISReg, WriteIEReg,WriteIS,
|
||||||
|
WriteID32,WriteID64,
|
||||||
|
WriteIM32,WriteIM64]>;
|
||||||
|
def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
|
||||||
|
WriteISReg, WriteIEReg,WriteIS,
|
||||||
|
WriteID32,WriteID64,
|
||||||
|
WriteIM32,WriteIM64]>;
|
||||||
|
|
||||||
|
// Div
|
||||||
|
def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
|
||||||
|
WriteISReg, WriteIEReg,WriteIS,
|
||||||
|
WriteID32,WriteID64,
|
||||||
|
WriteIM32,WriteIM64]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Subtarget-specific InstRWs.
|
// Subtarget-specific InstRWs.
|
||||||
|
|
||||||
|
//---
|
||||||
|
// Miscellaneous
|
||||||
|
//---
|
||||||
def : InstRW<[WriteI], (instrs COPY)>;
|
def : InstRW<[WriteI], (instrs COPY)>;
|
||||||
def : InstRW<[WriteLD], (instregex "LD[1-4]")>;
|
|
||||||
def : InstRW<[WriteST], (instregex "ST[1-4]")>;
|
//---
|
||||||
def : InstRW<[A53WriteFDiv], (instregex "^FDIV")>;
|
// Vector Mul with Accumulate
|
||||||
def : InstRW<[A53WriteFSqrt], (instregex ".*SQRT.*")>;
|
//---
|
||||||
|
//def : InstRW<[WriteIM32, A53ReadIMA], (instregex "^M(ADD|SUB)W.*")>;
|
||||||
|
//def : InstRW<[WriteIM64, A53ReadIMA], (instregex "^M(ADD|SUB)X.*")>;
|
||||||
|
|
||||||
|
//---
|
||||||
|
// Vector Loads
|
||||||
|
//---
|
||||||
|
def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
|
||||||
|
def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
|
||||||
|
def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2dq)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD3], (instregex "LD3Threev(2d)(_POST)?$")>;
|
||||||
|
|
||||||
|
def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)(_POST)?$")>;
|
||||||
|
|
||||||
|
def : InstRW<[A53WriteVLD1, A53WriteVLD1], (instregex "LDN?PS.*$")>;
|
||||||
|
def : InstRW<[A53WriteVLD2, A53WriteVLD2], (instregex "LDN?PD.*$")>;
|
||||||
|
def : InstRW<[A53WriteVLD4, A53WriteVLD4], (instregex "LDN?PQ.*$")>;
|
||||||
|
|
||||||
|
//---
|
||||||
|
// Vector Stores
|
||||||
|
//---
|
||||||
|
def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
|
||||||
|
def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)(_POST)?$")>;
|
||||||
|
|
||||||
|
def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)(_POST)?$")>;
|
||||||
|
|
||||||
|
def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>;
|
||||||
|
def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)(_POST)?$")>;
|
||||||
|
|
||||||
|
def : InstRW<[A53WriteVST1], (instregex "STN?P(S|D).*$")>;
|
||||||
|
def : InstRW<[A53WriteVST2], (instregex "STN?PQ.*$")>;
|
||||||
|
|
||||||
|
//---
|
||||||
|
// Floating Point MAC, DIV, SQRT
|
||||||
|
//---
|
||||||
|
def : InstRW<[A53WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
|
||||||
|
def : InstRW<[A53WriteFMAC], (instregex "^FML(A|S).*")>;
|
||||||
|
def : InstRW<[A53WriteFDivSP], (instrs FDIVSrr)>;
|
||||||
|
def : InstRW<[A53WriteFDivDP], (instrs FDIVDrr)>;
|
||||||
|
def : InstRW<[A53WriteFDivSP], (instregex "^FDIVv.*32$")>;
|
||||||
|
def : InstRW<[A53WriteFDivDP], (instregex "^FDIVv.*64$")>;
|
||||||
|
def : InstRW<[A53WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
|
||||||
|
def : InstRW<[A53WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -851,4 +851,15 @@ def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>;
|
|||||||
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>;
|
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>;
|
||||||
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>;
|
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>;
|
||||||
|
|
||||||
|
//---
|
||||||
|
// Unused SchedRead types
|
||||||
|
//---
|
||||||
|
|
||||||
|
def : ReadAdvance<ReadI, 0>;
|
||||||
|
def : ReadAdvance<ReadISReg, 0>;
|
||||||
|
def : ReadAdvance<ReadIEReg, 0>;
|
||||||
|
def : ReadAdvance<ReadIM, 0>;
|
||||||
|
def : ReadAdvance<ReadIMA, 0>;
|
||||||
|
def : ReadAdvance<ReadID, 0>;
|
||||||
|
|
||||||
} // SchedModel = CycloneModel
|
} // SchedModel = CycloneModel
|
||||||
|
@@ -25,13 +25,19 @@ def WriteImm : SchedWrite; // MOVN, MOVZ
|
|||||||
def WriteI : SchedWrite; // ALU
|
def WriteI : SchedWrite; // ALU
|
||||||
def WriteISReg : SchedWrite; // ALU of Shifted-Reg
|
def WriteISReg : SchedWrite; // ALU of Shifted-Reg
|
||||||
def WriteIEReg : SchedWrite; // ALU of Extended-Reg
|
def WriteIEReg : SchedWrite; // ALU of Extended-Reg
|
||||||
|
def ReadI : SchedRead; // ALU
|
||||||
|
def ReadISReg : SchedRead; // ALU of Shifted-Reg
|
||||||
|
def ReadIEReg : SchedRead; // ALU of Extended-Reg
|
||||||
def WriteExtr : SchedWrite; // EXTR shifts a reg pair
|
def WriteExtr : SchedWrite; // EXTR shifts a reg pair
|
||||||
def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair
|
def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair
|
||||||
def WriteIS : SchedWrite; // Shift/Scale
|
def WriteIS : SchedWrite; // Shift/Scale
|
||||||
def WriteID32 : SchedWrite; // 32-bit Divide
|
def WriteID32 : SchedWrite; // 32-bit Divide
|
||||||
def WriteID64 : SchedWrite; // 64-bit Divide
|
def WriteID64 : SchedWrite; // 64-bit Divide
|
||||||
|
def ReadID : SchedRead; // 32/64-bit Divide
|
||||||
def WriteIM32 : SchedWrite; // 32-bit Multiply
|
def WriteIM32 : SchedWrite; // 32-bit Multiply
|
||||||
def WriteIM64 : SchedWrite; // 64-bit Multiply
|
def WriteIM64 : SchedWrite; // 64-bit Multiply
|
||||||
|
def ReadIM : SchedRead; // 32/64-bit Multiply
|
||||||
|
def ReadIMA : SchedRead; // 32/64-bit Multiply Accumulate
|
||||||
def WriteBr : SchedWrite; // Branch
|
def WriteBr : SchedWrite; // Branch
|
||||||
def WriteBrReg : SchedWrite; // Indirect Branch
|
def WriteBrReg : SchedWrite; // Indirect Branch
|
||||||
|
|
||||||
@@ -44,6 +50,9 @@ def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
|
|||||||
def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
|
def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
|
||||||
def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
|
def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
|
||||||
|
|
||||||
|
// Predicate for determining when a shiftable register is shifted.
|
||||||
|
def RegShiftedPred : SchedPredicate<[{TII->hasNonZeroImm(MI)}]>;
|
||||||
|
|
||||||
// ScaledIdxPred is true if a WriteLDIdx operand will be
|
// ScaledIdxPred is true if a WriteLDIdx operand will be
|
||||||
// scaled. Subtargets can use this to dynamically select resources and
|
// scaled. Subtargets can use this to dynamically select resources and
|
||||||
// latency for WriteLDIdx and ReadAdrBase.
|
// latency for WriteLDIdx and ReadAdrBase.
|
||||||
|
@@ -8,9 +8,7 @@
|
|||||||
; CHECK: ********** MI Scheduling **********
|
; CHECK: ********** MI Scheduling **********
|
||||||
; CHECK: main
|
; CHECK: main
|
||||||
; CHECK: *** Final schedule for BB#2 ***
|
; CHECK: *** Final schedule for BB#2 ***
|
||||||
; CHECK: SU(13)
|
|
||||||
; CHECK: MADDWrrr
|
; CHECK: MADDWrrr
|
||||||
; CHECK: SU(4)
|
|
||||||
; CHECK: ADDWri
|
; CHECK: ADDWri
|
||||||
; CHECK: ********** INTERVALS **********
|
; CHECK: ********** INTERVALS **********
|
||||||
@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
|
@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
|
||||||
|
21
test/CodeGen/ARM64/misched-forwarding-A53.ll
Normal file
21
test/CodeGen/ARM64/misched-forwarding-A53.ll
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
; REQUIRES: asserts
|
||||||
|
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
|
||||||
|
;
|
||||||
|
; For Cortex-A53, shiftable operands that are not actually shifted
|
||||||
|
; are not needed for an additional two cycles.
|
||||||
|
;
|
||||||
|
; CHECK: ********** MI Scheduling **********
|
||||||
|
; CHECK: shiftable
|
||||||
|
; CHECK: *** Final schedule for BB#0 ***
|
||||||
|
; CHECK: ADDXrr %vreg0, %vreg2
|
||||||
|
; CHECK: ADDXrs %vreg0, %vreg2, 5
|
||||||
|
; CHECK: ********** INTERVALS **********
|
||||||
|
define i64 @shiftable(i64 %A, i64 %B) {
|
||||||
|
%tmp0 = sub i64 %B, 20
|
||||||
|
%tmp1 = shl i64 %tmp0, 5;
|
||||||
|
%tmp2 = add i64 %A, %tmp1;
|
||||||
|
%tmp3 = add i64 %A, %tmp0
|
||||||
|
%tmp4 = mul i64 %tmp2, %tmp3
|
||||||
|
|
||||||
|
ret i64 %tmp4
|
||||||
|
}
|
Reference in New Issue
Block a user