mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-23 16:19:52 +00:00
- Add TargetInstrInfo::getOperandLatency() to compute operand latencies. This
allow target to correctly compute latency for cases where static scheduling itineraries isn't sufficient. e.g. variable_ops instructions such as ARM::ldm. This also allows target without scheduling itineraries to compute operand latencies. e.g. X86 can return (approximated) latencies for high latency instructions such as division. - Compute operand latencies for those defined by load multiple instructions, e.g. ldm and those used by store multiple instructions, e.g. stm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115755 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -608,6 +608,22 @@ public:
|
|||||||
/// instruction will be decoded to on the target cpu.
|
/// instruction will be decoded to on the target cpu.
|
||||||
virtual unsigned getNumMicroOps(const MachineInstr *MI,
|
virtual unsigned getNumMicroOps(const MachineInstr *MI,
|
||||||
const InstrItineraryData *ItinData) const;
|
const InstrItineraryData *ItinData) const;
|
||||||
|
|
||||||
|
/// getOperandLatency - Compute and return the use operand latency of a given
|
||||||
|
/// itinerary class and operand index if the value is produced by an
|
||||||
|
/// instruction of the specified itinerary class and def operand index.
|
||||||
|
/// In most cases, the static scheduling itinerary was enough to determine the
|
||||||
|
/// operand latency. But it may not be possible for instructions with variable
|
||||||
|
/// number of defs / uses.
|
||||||
|
virtual
|
||||||
|
int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
|
const MachineInstr *UseMI, unsigned UseIdx) const;
|
||||||
|
|
||||||
|
virtual
|
||||||
|
int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
SDNode *DefNode, unsigned DefIdx,
|
||||||
|
SDNode *UseNode, unsigned UseIdx) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// TargetInstrInfoImpl - This is the default implementation of
|
/// TargetInstrInfoImpl - This is the default implementation of
|
||||||
|
|||||||
@@ -527,10 +527,10 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
|
|||||||
MachineInstr *DefMI = Def->getInstr();
|
MachineInstr *DefMI = Def->getInstr();
|
||||||
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
|
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
|
||||||
if (DefIdx != -1) {
|
if (DefIdx != -1) {
|
||||||
unsigned DefClass = DefMI->getDesc().getSchedClass();
|
const TargetInstrDesc &DefTID = DefMI->getDesc();
|
||||||
MachineInstr *UseMI = Use->getInstr();
|
unsigned DefClass = DefTID.getSchedClass();
|
||||||
unsigned UseClass = UseMI->getDesc().getSchedClass();
|
|
||||||
|
|
||||||
|
MachineInstr *UseMI = Use->getInstr();
|
||||||
// For all uses of the register, calculate the maxmimum latency
|
// For all uses of the register, calculate the maxmimum latency
|
||||||
int Latency = -1;
|
int Latency = -1;
|
||||||
for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
|
for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
|
||||||
@@ -541,8 +541,7 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
|
|||||||
if (MOReg != Reg)
|
if (MOReg != Reg)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
int UseCycle = InstrItins->getOperandLatency(DefClass, DefIdx,
|
int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, UseMI, i);
|
||||||
UseClass, i);
|
|
||||||
Latency = std::max(Latency, UseCycle);
|
Latency = std::max(Latency, UseCycle);
|
||||||
|
|
||||||
// If we found a latency, then replace the existing dependence latency.
|
// If we found a latency, then replace the existing dependence latency.
|
||||||
|
|||||||
@@ -450,29 +450,11 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
|
|||||||
if (ForceUnitLatencies())
|
if (ForceUnitLatencies())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!InstrItins || InstrItins->isEmpty())
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (dep.getKind() != SDep::Data)
|
if (dep.getKind() != SDep::Data)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
|
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
|
||||||
if (!Def->isMachineOpcode())
|
int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
|
||||||
return;
|
|
||||||
|
|
||||||
const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
|
|
||||||
if (DefIdx >= II.getNumDefs())
|
|
||||||
return;
|
|
||||||
|
|
||||||
int Latency = 0;
|
|
||||||
if (!Use->isMachineOpcode()) {
|
|
||||||
Latency = InstrItins->getOperandCycle(II.getSchedClass(), DefIdx);
|
|
||||||
} else {
|
|
||||||
unsigned DefClass = II.getSchedClass();
|
|
||||||
unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
|
|
||||||
Latency = InstrItins->getOperandLatency(DefClass, DefIdx, UseClass, OpIdx);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Latency >= 0)
|
if (Latency >= 0)
|
||||||
dep.setLatency(Latency);
|
dep.setLatency(Latency);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1642,3 +1642,164 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const TargetInstrDesc &DefTID,
|
||||||
|
unsigned DefIdx, unsigned DefAlign,
|
||||||
|
const TargetInstrDesc &UseTID,
|
||||||
|
unsigned UseIdx, unsigned UseAlign) const {
|
||||||
|
unsigned DefClass = DefTID.getSchedClass();
|
||||||
|
unsigned UseClass = UseTID.getSchedClass();
|
||||||
|
|
||||||
|
if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
|
||||||
|
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
|
||||||
|
|
||||||
|
// This may be a def / use of a variable_ops instruction, the operand
|
||||||
|
// latency might be determinable dynamically. Let the target try to
|
||||||
|
// figure it out.
|
||||||
|
bool LdmBypass = false;
|
||||||
|
int DefCycle = -1;
|
||||||
|
switch (DefTID.getOpcode()) {
|
||||||
|
default:
|
||||||
|
DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
|
||||||
|
break;
|
||||||
|
case ARM::LDM_RET:
|
||||||
|
case ARM::LDM:
|
||||||
|
case ARM::LDM_UPD:
|
||||||
|
case ARM::tLDM:
|
||||||
|
case ARM::tLDM_UPD:
|
||||||
|
case ARM::tPUSH:
|
||||||
|
case ARM::t2LDM_RET:
|
||||||
|
case ARM::t2LDM:
|
||||||
|
case ARM::t2LDM_UPD: {
|
||||||
|
LdmBypass = 1;
|
||||||
|
unsigned RegNo = (DefIdx+1) - DefTID.getNumOperands() + 1;
|
||||||
|
if (Subtarget.isCortexA8()) {
|
||||||
|
// 4 registers would be issued: 1, 2, 1.
|
||||||
|
// 5 registers would be issued: 1, 2, 2.
|
||||||
|
DefCycle = RegNo / 2;
|
||||||
|
if (DefCycle < 1)
|
||||||
|
DefCycle = 1;
|
||||||
|
// Result latency is issue cycle + 2: E2.
|
||||||
|
DefCycle += 2;
|
||||||
|
} else if (Subtarget.isCortexA9()) {
|
||||||
|
DefCycle = (RegNo / 2);
|
||||||
|
// If there are odd number of registers or if it's not 64-bit aligned,
|
||||||
|
// then it takes an extra AGU (Address Generation Unit) cycle.
|
||||||
|
if ((RegNo % 2) || DefAlign < 8)
|
||||||
|
++DefCycle;
|
||||||
|
// Result latency is AGU cycles + 2.
|
||||||
|
DefCycle += 2;
|
||||||
|
} else {
|
||||||
|
// Assume the worst.
|
||||||
|
DefCycle = RegNo + 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DefCycle == -1)
|
||||||
|
// We can't seem to determine the result latency of the def, assume it's 2.
|
||||||
|
DefCycle = 2;
|
||||||
|
|
||||||
|
int UseCycle = -1;
|
||||||
|
switch (UseTID.getOpcode()) {
|
||||||
|
default:
|
||||||
|
UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
|
||||||
|
break;
|
||||||
|
case ARM::STM:
|
||||||
|
case ARM::STM_UPD:
|
||||||
|
case ARM::tSTM_UPD:
|
||||||
|
case ARM::tPOP_RET:
|
||||||
|
case ARM::tPOP:
|
||||||
|
case ARM::t2STM:
|
||||||
|
case ARM::t2STM_UPD: {
|
||||||
|
unsigned RegNo = UseIdx - UseTID.getNumOperands() + 1;
|
||||||
|
if (Subtarget.isCortexA8()) {
|
||||||
|
// 4 registers would be issued: 1, 2, 1.
|
||||||
|
// 5 registers would be issued: 1, 2, 2.
|
||||||
|
UseCycle = RegNo / 2;
|
||||||
|
if (UseCycle < 2)
|
||||||
|
UseCycle = 2;
|
||||||
|
// Result latency is issue cycle + 2: E2.
|
||||||
|
UseCycle += 2;
|
||||||
|
} else if (Subtarget.isCortexA9()) {
|
||||||
|
UseCycle = (RegNo / 2);
|
||||||
|
// If there are odd number of registers or if it's not 64-bit aligned,
|
||||||
|
// then it takes an extra AGU (Address Generation Unit) cycle.
|
||||||
|
if ((RegNo % 2) || UseAlign < 8)
|
||||||
|
++UseCycle;
|
||||||
|
// Result latency is AGU cycles + 2.
|
||||||
|
UseCycle += 2;
|
||||||
|
} else {
|
||||||
|
// Assume the worst.
|
||||||
|
UseCycle = RegNo + 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (UseCycle == -1)
|
||||||
|
// Assume it's read in the first stage.
|
||||||
|
UseCycle = 1;
|
||||||
|
|
||||||
|
UseCycle = DefCycle - UseCycle + 1;
|
||||||
|
if (UseCycle > 0) {
|
||||||
|
if (LdmBypass) {
|
||||||
|
// It's a variable_ops instruction so we can't use DefIdx here. Just use
|
||||||
|
// first def operand.
|
||||||
|
if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
|
||||||
|
UseClass, UseIdx))
|
||||||
|
--UseCycle;
|
||||||
|
} else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
|
||||||
|
UseClass, UseIdx))
|
||||||
|
--UseCycle;
|
||||||
|
}
|
||||||
|
|
||||||
|
return UseCycle;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
|
const MachineInstr *UseMI, unsigned UseIdx) const {
|
||||||
|
if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
|
||||||
|
DefMI->isRegSequence() || DefMI->isImplicitDef())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
const TargetInstrDesc &DefTID = DefMI->getDesc();
|
||||||
|
if (!ItinData || ItinData->isEmpty())
|
||||||
|
return DefTID.mayLoad() ? 3 : 1;
|
||||||
|
|
||||||
|
const TargetInstrDesc &UseTID = UseMI->getDesc();
|
||||||
|
unsigned DefAlign = DefMI->hasOneMemOperand()
|
||||||
|
? (*DefMI->memoperands_begin())->getAlignment() : 0;
|
||||||
|
unsigned UseAlign = UseMI->hasOneMemOperand()
|
||||||
|
? (*UseMI->memoperands_begin())->getAlignment() : 0;
|
||||||
|
return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
|
||||||
|
UseTID, UseIdx, UseAlign);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
SDNode *DefNode, unsigned DefIdx,
|
||||||
|
SDNode *UseNode, unsigned UseIdx) const {
|
||||||
|
if (!DefNode->isMachineOpcode())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
|
||||||
|
if (!ItinData || ItinData->isEmpty())
|
||||||
|
return DefTID.mayLoad() ? 3 : 1;
|
||||||
|
|
||||||
|
if (!UseNode->isMachineOpcode())
|
||||||
|
return ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
|
||||||
|
|
||||||
|
const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
|
||||||
|
const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
|
||||||
|
unsigned DefAlign = !DefMN->memoperands_empty()
|
||||||
|
? (*DefMN->memoperands_begin())->getAlignment() : 0;
|
||||||
|
const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
|
||||||
|
unsigned UseAlign = !UseMN->memoperands_empty()
|
||||||
|
? (*UseMN->memoperands_begin())->getAlignment() : 0;
|
||||||
|
return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
|
||||||
|
UseTID, UseIdx, UseAlign);
|
||||||
|
}
|
||||||
|
|||||||
@@ -340,6 +340,21 @@ public:
|
|||||||
|
|
||||||
virtual unsigned getNumMicroOps(const MachineInstr *MI,
|
virtual unsigned getNumMicroOps(const MachineInstr *MI,
|
||||||
const InstrItineraryData *ItinData) const;
|
const InstrItineraryData *ItinData) const;
|
||||||
|
|
||||||
|
virtual
|
||||||
|
int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
|
const MachineInstr *UseMI, unsigned UseIdx) const;
|
||||||
|
virtual
|
||||||
|
int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
SDNode *DefNode, unsigned DefIdx,
|
||||||
|
SDNode *UseNode, unsigned UseIdx) const;
|
||||||
|
private:
|
||||||
|
int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const TargetInstrDesc &DefTID,
|
||||||
|
unsigned DefIdx, unsigned DefAlign,
|
||||||
|
const TargetInstrDesc &UseTID,
|
||||||
|
unsigned UseIdx, unsigned UseAlign) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
|
|||||||
@@ -954,7 +954,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
|
|||||||
hasExtraDefRegAllocReq = 1 in
|
hasExtraDefRegAllocReq = 1 in
|
||||||
def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
||||||
reglist:$dsts, variable_ops),
|
reglist:$dsts, variable_ops),
|
||||||
IndexModeUpd, LdStMulFrm, IIC_iLoadmBr,
|
IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr,
|
||||||
"ldm${addr:submode}${p}\t$addr!, $dsts",
|
"ldm${addr:submode}${p}\t$addr!, $dsts",
|
||||||
"$addr.addr = $wb", []>;
|
"$addr.addr = $wb", []>;
|
||||||
|
|
||||||
@@ -1463,12 +1463,12 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb),
|
|||||||
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
||||||
def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
|
def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
|
||||||
reglist:$dsts, variable_ops),
|
reglist:$dsts, variable_ops),
|
||||||
IndexModeNone, LdStMulFrm, IIC_iLoadm,
|
IndexModeNone, LdStMulFrm, IIC_iLoad_m,
|
||||||
"ldm${addr:submode}${p}\t$addr, $dsts", "", []>;
|
"ldm${addr:submode}${p}\t$addr, $dsts", "", []>;
|
||||||
|
|
||||||
def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
||||||
reglist:$dsts, variable_ops),
|
reglist:$dsts, variable_ops),
|
||||||
IndexModeUpd, LdStMulFrm, IIC_iLoadm,
|
IndexModeUpd, LdStMulFrm, IIC_iLoad_mu,
|
||||||
"ldm${addr:submode}${p}\t$addr!, $dsts",
|
"ldm${addr:submode}${p}\t$addr!, $dsts",
|
||||||
"$addr.addr = $wb", []>;
|
"$addr.addr = $wb", []>;
|
||||||
} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
|
} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
|
||||||
@@ -1476,12 +1476,12 @@ def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
|||||||
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
||||||
def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
|
def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
|
||||||
reglist:$srcs, variable_ops),
|
reglist:$srcs, variable_ops),
|
||||||
IndexModeNone, LdStMulFrm, IIC_iStorem,
|
IndexModeNone, LdStMulFrm, IIC_iStore_m,
|
||||||
"stm${addr:submode}${p}\t$addr, $srcs", "", []>;
|
"stm${addr:submode}${p}\t$addr, $srcs", "", []>;
|
||||||
|
|
||||||
def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
||||||
reglist:$srcs, variable_ops),
|
reglist:$srcs, variable_ops),
|
||||||
IndexModeUpd, LdStMulFrm, IIC_iStorem,
|
IndexModeUpd, LdStMulFrm, IIC_iStore_mu,
|
||||||
"stm${addr:submode}${p}\t$addr!, $srcs",
|
"stm${addr:submode}${p}\t$addr!, $srcs",
|
||||||
"$addr.addr = $wb", []>;
|
"$addr.addr = $wb", []>;
|
||||||
} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
|
} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
|
||||||
|
|||||||
@@ -280,7 +280,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
|
|||||||
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
|
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
|
||||||
hasExtraDefRegAllocReq = 1 in
|
hasExtraDefRegAllocReq = 1 in
|
||||||
def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops),
|
def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops),
|
||||||
IIC_iLoadmBr,
|
IIC_iPop_Br,
|
||||||
"pop${p}\t$dsts", []>,
|
"pop${p}\t$dsts", []>,
|
||||||
T1Misc<{1,1,0,?,?,?,?}>;
|
T1Misc<{1,1,0,?,?,?,?}>;
|
||||||
|
|
||||||
@@ -535,13 +535,13 @@ def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i,
|
|||||||
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
||||||
def tLDM : T1I<(outs),
|
def tLDM : T1I<(outs),
|
||||||
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
|
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
|
||||||
IIC_iLoadm,
|
IIC_iLoad_m,
|
||||||
"ldm${addr:submode}${p}\t$addr, $dsts", []>,
|
"ldm${addr:submode}${p}\t$addr, $dsts", []>,
|
||||||
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
|
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
|
||||||
|
|
||||||
def tLDM_UPD : T1It<(outs tGPR:$wb),
|
def tLDM_UPD : T1It<(outs tGPR:$wb),
|
||||||
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
|
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
|
||||||
IIC_iLoadm,
|
IIC_iLoad_m,
|
||||||
"ldm${addr:submode}${p}\t$addr!, $dsts",
|
"ldm${addr:submode}${p}\t$addr!, $dsts",
|
||||||
"$addr.addr = $wb", []>,
|
"$addr.addr = $wb", []>,
|
||||||
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
|
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
|
||||||
@@ -550,18 +550,20 @@ def tLDM_UPD : T1It<(outs tGPR:$wb),
|
|||||||
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
|
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
|
||||||
def tSTM_UPD : T1It<(outs tGPR:$wb),
|
def tSTM_UPD : T1It<(outs tGPR:$wb),
|
||||||
(ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
|
(ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
|
||||||
IIC_iStorem,
|
IIC_iStore_mu,
|
||||||
"stm${addr:submode}${p}\t$addr!, $srcs",
|
"stm${addr:submode}${p}\t$addr!, $srcs",
|
||||||
"$addr.addr = $wb", []>,
|
"$addr.addr = $wb", []>,
|
||||||
T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
|
T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
|
||||||
|
|
||||||
let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
|
let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
|
||||||
def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_iLoadmBr,
|
def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops),
|
||||||
|
IIC_iPop,
|
||||||
"pop${p}\t$dsts", []>,
|
"pop${p}\t$dsts", []>,
|
||||||
T1Misc<{1,1,0,?,?,?,?}>;
|
T1Misc<{1,1,0,?,?,?,?}>;
|
||||||
|
|
||||||
let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
|
let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
|
||||||
def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_iStorem,
|
def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops),
|
||||||
|
IIC_iStore_m,
|
||||||
"push${p}\t$srcs", []>,
|
"push${p}\t$srcs", []>,
|
||||||
T1Misc<{0,1,0,?,?,?,?}>;
|
T1Misc<{0,1,0,?,?,?,?}>;
|
||||||
|
|
||||||
|
|||||||
@@ -1243,7 +1243,7 @@ defm t2PLI : T2Ipl<1, 0, "pli">;
|
|||||||
|
|
||||||
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
||||||
def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
|
def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
|
||||||
reglist:$dsts, variable_ops), IIC_iLoadm,
|
reglist:$dsts, variable_ops), IIC_iLoad_m,
|
||||||
"ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
|
"ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
|
||||||
let Inst{31-27} = 0b11101;
|
let Inst{31-27} = 0b11101;
|
||||||
let Inst{26-25} = 0b00;
|
let Inst{26-25} = 0b00;
|
||||||
@@ -1254,7 +1254,8 @@ def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
|
|||||||
}
|
}
|
||||||
|
|
||||||
def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
||||||
reglist:$dsts, variable_ops), IIC_iLoadm,
|
reglist:$dsts, variable_ops),
|
||||||
|
IIC_iLoad_mu,
|
||||||
"ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
|
"ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
|
||||||
"$addr.addr = $wb", []> {
|
"$addr.addr = $wb", []> {
|
||||||
let Inst{31-27} = 0b11101;
|
let Inst{31-27} = 0b11101;
|
||||||
@@ -1268,7 +1269,7 @@ def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
|||||||
|
|
||||||
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
||||||
def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
|
def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
|
||||||
reglist:$srcs, variable_ops), IIC_iStorem,
|
reglist:$srcs, variable_ops), IIC_iStore_m,
|
||||||
"stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
|
"stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
|
||||||
let Inst{31-27} = 0b11101;
|
let Inst{31-27} = 0b11101;
|
||||||
let Inst{26-25} = 0b00;
|
let Inst{26-25} = 0b00;
|
||||||
@@ -1280,7 +1281,7 @@ def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
|
|||||||
|
|
||||||
def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
||||||
reglist:$srcs, variable_ops),
|
reglist:$srcs, variable_ops),
|
||||||
IIC_iStorem,
|
IIC_iStore_m,
|
||||||
"stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs",
|
"stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs",
|
||||||
"$addr.addr = $wb", []> {
|
"$addr.addr = $wb", []> {
|
||||||
let Inst{31-27} = 0b11101;
|
let Inst{31-27} = 0b11101;
|
||||||
@@ -2473,7 +2474,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
|
|||||||
hasExtraDefRegAllocReq = 1 in
|
hasExtraDefRegAllocReq = 1 in
|
||||||
def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
||||||
reglist:$dsts, variable_ops),
|
reglist:$dsts, variable_ops),
|
||||||
IIC_iLoadmBr,
|
IIC_iLoad_mBr,
|
||||||
"ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
|
"ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
|
||||||
"$addr.addr = $wb", []> {
|
"$addr.addr = $wb", []> {
|
||||||
let Inst{31-27} = 0b11101;
|
let Inst{31-27} = 0b11101;
|
||||||
|
|||||||
@@ -67,8 +67,11 @@ def IIC_iLoad_bh_siu : InstrItinClass;
|
|||||||
def IIC_iLoad_d_i : InstrItinClass;
|
def IIC_iLoad_d_i : InstrItinClass;
|
||||||
def IIC_iLoad_d_r : InstrItinClass;
|
def IIC_iLoad_d_r : InstrItinClass;
|
||||||
def IIC_iLoad_d_ru : InstrItinClass;
|
def IIC_iLoad_d_ru : InstrItinClass;
|
||||||
def IIC_iLoadm : InstrItinClass<0>; // micro-coded
|
def IIC_iLoad_m : InstrItinClass<0>; // micro-coded
|
||||||
def IIC_iLoadmBr : InstrItinClass<0>; // micro-coded
|
def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded
|
||||||
|
def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded
|
||||||
|
def IIC_iPop : InstrItinClass<0>; // micro-coded
|
||||||
|
def IIC_iPop_Br : InstrItinClass<0>; // micro-coded
|
||||||
def IIC_iLoadiALU : InstrItinClass;
|
def IIC_iLoadiALU : InstrItinClass;
|
||||||
def IIC_iStore_i : InstrItinClass;
|
def IIC_iStore_i : InstrItinClass;
|
||||||
def IIC_iStore_r : InstrItinClass;
|
def IIC_iStore_r : InstrItinClass;
|
||||||
@@ -85,7 +88,8 @@ def IIC_iStore_bh_siu : InstrItinClass;
|
|||||||
def IIC_iStore_d_i : InstrItinClass;
|
def IIC_iStore_d_i : InstrItinClass;
|
||||||
def IIC_iStore_d_r : InstrItinClass;
|
def IIC_iStore_d_r : InstrItinClass;
|
||||||
def IIC_iStore_d_ru : InstrItinClass;
|
def IIC_iStore_d_ru : InstrItinClass;
|
||||||
def IIC_iStorem : InstrItinClass<0>; // micro-coded
|
def IIC_iStore_m : InstrItinClass<0>; // micro-coded
|
||||||
|
def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
|
||||||
def IIC_Br : InstrItinClass;
|
def IIC_Br : InstrItinClass;
|
||||||
def IIC_fpSTAT : InstrItinClass;
|
def IIC_fpSTAT : InstrItinClass;
|
||||||
def IIC_fpUNA32 : InstrItinClass;
|
def IIC_fpUNA32 : InstrItinClass;
|
||||||
|
|||||||
@@ -172,21 +172,44 @@ def CortexA8Itineraries : ProcessorItineraries<
|
|||||||
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
||||||
InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
|
InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
|
||||||
//
|
//
|
||||||
// Load multiple
|
// Load multiple, def is the 5th operand.
|
||||||
InstrItinData<IIC_iLoadm , [InstrStage<2, [A8_Issue], 0>,
|
InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Issue], 0>,
|
||||||
InstrStage<2, [A8_Pipe0], 0>,
|
InstrStage<2, [A8_Pipe0], 0>,
|
||||||
InstrStage<2, [A8_Pipe1]>,
|
InstrStage<2, [A8_Pipe1]>,
|
||||||
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
||||||
InstrStage<1, [A8_LdSt0]>]>,
|
InstrStage<1, [A8_LdSt0]>], [1, 1, 1, 1, 3]>,
|
||||||
|
//
|
||||||
|
// Load multiple + update, defs are the 1st and 5th operands.
|
||||||
|
InstrItinData<IIC_iLoad_mu , [InstrStage<2, [A8_Issue], 0>,
|
||||||
|
InstrStage<2, [A8_Pipe0], 0>,
|
||||||
|
InstrStage<2, [A8_Pipe1]>,
|
||||||
|
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
||||||
|
InstrStage<1, [A8_LdSt0]>], [2, 1, 1, 1, 3]>,
|
||||||
//
|
//
|
||||||
// Load multiple plus branch
|
// Load multiple plus branch
|
||||||
InstrItinData<IIC_iLoadmBr , [InstrStage<2, [A8_Issue], 0>,
|
InstrItinData<IIC_iLoad_mBr, [InstrStage<2, [A8_Issue], 0>,
|
||||||
InstrStage<2, [A8_Pipe0], 0>,
|
InstrStage<2, [A8_Pipe0], 0>,
|
||||||
InstrStage<2, [A8_Pipe1]>,
|
InstrStage<2, [A8_Pipe1]>,
|
||||||
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
||||||
InstrStage<1, [A8_LdSt0]>,
|
InstrStage<1, [A8_LdSt0]>,
|
||||||
InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
|
InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
|
||||||
|
[1, 2, 1, 1, 3]>,
|
||||||
|
//
|
||||||
|
// Pop, def is the 3rd operand.
|
||||||
|
InstrItinData<IIC_iPop , [InstrStage<2, [A8_Issue], 0>,
|
||||||
|
InstrStage<2, [A8_Pipe0], 0>,
|
||||||
|
InstrStage<2, [A8_Pipe1]>,
|
||||||
|
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
||||||
|
InstrStage<1, [A8_LdSt0]>], [1, 1, 3]>,
|
||||||
|
//
|
||||||
|
// Push, def is the 3th operand.
|
||||||
|
InstrItinData<IIC_iPop_Br, [InstrStage<2, [A8_Issue], 0>,
|
||||||
|
InstrStage<2, [A8_Pipe0], 0>,
|
||||||
|
InstrStage<2, [A8_Pipe1]>,
|
||||||
|
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
||||||
|
InstrStage<1, [A8_LdSt0]>,
|
||||||
|
InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
|
||||||
|
[1, 1, 3]>,
|
||||||
|
|
||||||
//
|
//
|
||||||
// iLoadi + iALUr for t2LDRpci_pic.
|
// iLoadi + iALUr for t2LDRpci_pic.
|
||||||
@@ -266,11 +289,18 @@ def CortexA8Itineraries : ProcessorItineraries<
|
|||||||
InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
|
InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
|
||||||
//
|
//
|
||||||
// Store multiple
|
// Store multiple
|
||||||
InstrItinData<IIC_iStorem , [InstrStage<2, [A8_Issue], 0>,
|
InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Issue], 0>,
|
||||||
InstrStage<2, [A8_Pipe0], 0>,
|
InstrStage<2, [A8_Pipe0], 0>,
|
||||||
InstrStage<2, [A8_Pipe1]>,
|
InstrStage<2, [A8_Pipe1]>,
|
||||||
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
||||||
InstrStage<1, [A8_LdSt0]>]>,
|
InstrStage<1, [A8_LdSt0]>]>,
|
||||||
|
//
|
||||||
|
// Store multiple + update
|
||||||
|
InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Issue], 0>,
|
||||||
|
InstrStage<2, [A8_Pipe0], 0>,
|
||||||
|
InstrStage<2, [A8_Pipe1]>,
|
||||||
|
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
||||||
|
InstrStage<1, [A8_LdSt0]>], [2]>,
|
||||||
|
|
||||||
// Branch
|
// Branch
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -242,18 +242,42 @@ def CortexA9Itineraries : ProcessorItineraries<
|
|||||||
InstrStage<2, [A9_AGU]>],
|
InstrStage<2, [A9_AGU]>],
|
||||||
[5, 4, 1, 1], [A9_LdBypass]>,
|
[5, 4, 1, 1], [A9_LdBypass]>,
|
||||||
//
|
//
|
||||||
// Load multiple
|
// Load multiple, def is the 5th operand.
|
||||||
InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
||||||
InstrStage<1, [A9_MUX0], 0>,
|
InstrStage<1, [A9_MUX0], 0>,
|
||||||
InstrStage<2, [A9_AGU]>],
|
InstrStage<2, [A9_AGU]>],
|
||||||
[3], [A9_LdBypass]>,
|
[1, 1, 1, 1, 3],
|
||||||
|
[NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
|
||||||
|
//
|
||||||
|
// Load multiple + update, defs are the 1st and 5th operands.
|
||||||
|
InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
||||||
|
InstrStage<1, [A9_MUX0], 0>,
|
||||||
|
InstrStage<2, [A9_AGU]>],
|
||||||
|
[2, 1, 1, 1, 3],
|
||||||
|
[NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
|
||||||
//
|
//
|
||||||
// Load multiple plus branch
|
// Load multiple plus branch
|
||||||
InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
||||||
InstrStage<1, [A9_MUX0], 0>,
|
InstrStage<1, [A9_MUX0], 0>,
|
||||||
InstrStage<1, [A9_AGU]>,
|
InstrStage<1, [A9_AGU]>,
|
||||||
InstrStage<1, [A9_Branch]>]>,
|
InstrStage<1, [A9_Branch]>],
|
||||||
|
[1, 2, 1, 1, 3],
|
||||||
|
[NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
|
||||||
|
//
|
||||||
|
// Pop, def is the 3rd operand.
|
||||||
|
InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
||||||
|
InstrStage<1, [A9_MUX0], 0>,
|
||||||
|
InstrStage<2, [A9_AGU]>],
|
||||||
|
[1, 1, 3],
|
||||||
|
[NoBypass, NoBypass, A9_LdBypass]>,
|
||||||
|
//
|
||||||
|
// Pop + branch, def is the 3rd operand.
|
||||||
|
InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
||||||
|
InstrStage<1, [A9_MUX0], 0>,
|
||||||
|
InstrStage<2, [A9_AGU]>,
|
||||||
|
InstrStage<1, [A9_Branch]>],
|
||||||
|
[1, 1, 3],
|
||||||
|
[NoBypass, NoBypass, A9_LdBypass]>,
|
||||||
|
|
||||||
//
|
//
|
||||||
// iLoadi + iALUr for t2LDRpci_pic.
|
// iLoadi + iALUr for t2LDRpci_pic.
|
||||||
@@ -329,13 +353,21 @@ def CortexA9Itineraries : ProcessorItineraries<
|
|||||||
[3, 1, 1, 1]>,
|
[3, 1, 1, 1]>,
|
||||||
//
|
//
|
||||||
// Store multiple
|
// Store multiple
|
||||||
InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
||||||
InstrStage<1, [A9_MUX0], 0>,
|
InstrStage<1, [A9_MUX0], 0>,
|
||||||
InstrStage<1, [A9_AGU]>]>,
|
InstrStage<1, [A9_AGU]>]>,
|
||||||
|
//
|
||||||
|
// Store multiple + update
|
||||||
|
InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
||||||
|
InstrStage<1, [A9_MUX0], 0>,
|
||||||
|
InstrStage<1, [A9_AGU]>], [2]>,
|
||||||
|
|
||||||
// Branch
|
// Branch
|
||||||
//
|
//
|
||||||
// no delay slots, so the latency of a branch is unimportant
|
// no delay slots, so the latency of a branch is unimportant
|
||||||
InstrItinData<IIC_Br , [InstrStage<1, [A9_Branch]>]>,
|
InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
|
||||||
|
InstrStage<1, [A9_Issue1], 0>,
|
||||||
|
InstrStage<1, [A9_Branch]>]>,
|
||||||
|
|
||||||
// VFP and NEON shares the same register file. This means that every VFP
|
// VFP and NEON shares the same register file. This means that every VFP
|
||||||
// instruction should wait for full completion of the consecutive NEON
|
// instruction should wait for full completion of the consecutive NEON
|
||||||
|
|||||||
@@ -116,19 +116,29 @@ def ARMV6Itineraries : ProcessorItineraries<
|
|||||||
InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
|
InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
|
||||||
|
|
||||||
//
|
//
|
||||||
// Load multiple
|
// Load multiple, def is the 5th operand.
|
||||||
InstrItinData<IIC_iLoadm , [InstrStage<3, [V6_Pipe]>]>,
|
InstrItinData<IIC_iLoad_m , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
|
||||||
|
//
|
||||||
|
// Load multiple + update, defs are the 1st and 5th operands.
|
||||||
|
InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
|
||||||
//
|
//
|
||||||
// Load multiple plus branch
|
// Load multiple plus branch
|
||||||
InstrItinData<IIC_iLoadmBr , [InstrStage<3, [V6_Pipe]>,
|
InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
|
||||||
InstrStage<1, [V6_Pipe]>]>,
|
InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
|
||||||
|
|
||||||
//
|
//
|
||||||
// iLoadi + iALUr for t2LDRpci_pic.
|
// iLoadi + iALUr for t2LDRpci_pic.
|
||||||
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
|
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
|
||||||
InstrStage<1, [V6_Pipe]>], [3, 1]>,
|
InstrStage<1, [V6_Pipe]>], [3, 1]>,
|
||||||
|
|
||||||
|
//
|
||||||
|
// Pop, def is the 3rd operand.
|
||||||
|
InstrItinData<IIC_iPop , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
|
||||||
|
//
|
||||||
|
// Pop + branch, def is the 3rd operand.
|
||||||
|
InstrItinData<IIC_iPop_Br, [InstrStage<3, [V6_Pipe]>,
|
||||||
|
InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
|
||||||
|
|
||||||
// Integer store pipeline
|
// Integer store pipeline
|
||||||
//
|
//
|
||||||
// Immediate offset
|
// Immediate offset
|
||||||
@@ -159,7 +169,10 @@ def ARMV6Itineraries : ProcessorItineraries<
|
|||||||
InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
|
InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
|
||||||
//
|
//
|
||||||
// Store multiple
|
// Store multiple
|
||||||
InstrItinData<IIC_iStorem , [InstrStage<3, [V6_Pipe]>]>,
|
InstrItinData<IIC_iStore_m , [InstrStage<3, [V6_Pipe]>]>,
|
||||||
|
//
|
||||||
|
// Store multiple + update
|
||||||
|
InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
|
||||||
|
|
||||||
// Branch
|
// Branch
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -12,9 +12,10 @@
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "llvm/Target/TargetInstrInfo.h"
|
#include "llvm/Target/TargetInstrInfo.h"
|
||||||
#include "llvm/MC/MCAsmInfo.h"
|
|
||||||
#include "llvm/Target/TargetInstrItineraries.h"
|
#include "llvm/Target/TargetInstrItineraries.h"
|
||||||
#include "llvm/Target/TargetRegisterInfo.h"
|
#include "llvm/Target/TargetRegisterInfo.h"
|
||||||
|
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
||||||
|
#include "llvm/MC/MCAsmInfo.h"
|
||||||
#include "llvm/Support/ErrorHandling.h"
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
@@ -64,6 +65,36 @@ TargetInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
|
const MachineInstr *UseMI, unsigned UseIdx) const {
|
||||||
|
if (!ItinData || ItinData->isEmpty())
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
unsigned DefClass = DefMI->getDesc().getSchedClass();
|
||||||
|
unsigned UseClass = UseMI->getDesc().getSchedClass();
|
||||||
|
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
SDNode *DefNode, unsigned DefIdx,
|
||||||
|
SDNode *UseNode, unsigned UseIdx) const {
|
||||||
|
if (!ItinData || ItinData->isEmpty())
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (!DefNode->isMachineOpcode())
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
|
||||||
|
if (!UseNode->isMachineOpcode())
|
||||||
|
return ItinData->getOperandCycle(DefClass, DefIdx);
|
||||||
|
unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
|
||||||
|
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// insertNoop - Insert a noop into the instruction stream at the specified
|
/// insertNoop - Insert a noop into the instruction stream at the specified
|
||||||
/// point.
|
/// point.
|
||||||
void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
||||||
|
|||||||
@@ -13,8 +13,8 @@ define i32 @f1(i32 %a, i64 %b) {
|
|||||||
; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi.
|
; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi.
|
||||||
define i32 @f2() nounwind optsize {
|
define i32 @f2() nounwind optsize {
|
||||||
; ELF: f2:
|
; ELF: f2:
|
||||||
; ELF: mov r0, #128
|
; ELF: mov [[REGISTER:(r[0-9]+)]], #128
|
||||||
; ELF: str r0, [sp]
|
; ELF: str [[REGISTER]], [sp]
|
||||||
; DARWIN: f2:
|
; DARWIN: f2:
|
||||||
; DARWIN: mov r3, #128
|
; DARWIN: mov r3, #128
|
||||||
entry:
|
entry:
|
||||||
|
|||||||
@@ -79,9 +79,9 @@ define double @f7(double %a, double %b) {
|
|||||||
; CHECK-NEON: movw [[REGISTER_1:r[0-9]+]], #1123
|
; CHECK-NEON: movw [[REGISTER_1:r[0-9]+]], #1123
|
||||||
; CHECK-NEON-NEXT: movs [[REGISTER_2:r[0-9]+]], #0
|
; CHECK-NEON-NEXT: movs [[REGISTER_2:r[0-9]+]], #0
|
||||||
; CHECK-NEON-NEXT: cmp r0, [[REGISTER_1]]
|
; CHECK-NEON-NEXT: cmp r0, [[REGISTER_1]]
|
||||||
; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI
|
|
||||||
; CHECK-NEON-NEXT: it eq
|
; CHECK-NEON-NEXT: it eq
|
||||||
; CHECK-NEON-NEXT: moveq [[REGISTER_2]], #4
|
; CHECK-NEON-NEXT: moveq [[REGISTER_2]], #4
|
||||||
|
; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI
|
||||||
; CHECK-NEON-NEXT: ldr
|
; CHECK-NEON-NEXT: ldr
|
||||||
; CHECK-NEON: bx
|
; CHECK-NEON: bx
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user