mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-16 11:24:39 +00:00
Convert VLD1 and VLD2 instructions to use pseudo-instructions until
after regalloc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112825 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -48,6 +48,8 @@ namespace {
|
|||||||
void TransferImpOps(MachineInstr &OldMI,
|
void TransferImpOps(MachineInstr &OldMI,
|
||||||
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
|
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
|
||||||
bool ExpandMBB(MachineBasicBlock &MBB);
|
bool ExpandMBB(MachineBasicBlock &MBB);
|
||||||
|
void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
|
||||||
|
bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
|
||||||
void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
|
void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
|
||||||
bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
|
bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
|
||||||
};
|
};
|
||||||
@@ -72,6 +74,66 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// ExpandVLD -
|
||||||
|
///
|
||||||
|
void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
|
||||||
|
unsigned Opc, bool hasWriteBack,
|
||||||
|
NEONRegSpacing RegSpc, unsigned NumRegs) {
|
||||||
|
MachineInstr &MI = *MBBI;
|
||||||
|
MachineBasicBlock &MBB = *MI.getParent();
|
||||||
|
|
||||||
|
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
|
||||||
|
unsigned OpIdx = 0;
|
||||||
|
|
||||||
|
bool DstIsDead = MI.getOperand(OpIdx).isDead();
|
||||||
|
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
|
||||||
|
unsigned D0, D1, D2, D3;
|
||||||
|
if (RegSpc == SingleSpc) {
|
||||||
|
D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
|
||||||
|
D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
|
||||||
|
D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
|
||||||
|
D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
|
||||||
|
} else if (RegSpc == EvenDblSpc) {
|
||||||
|
D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
|
||||||
|
D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
|
||||||
|
D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
|
||||||
|
D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
|
||||||
|
} else {
|
||||||
|
assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
|
||||||
|
D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
|
||||||
|
D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
|
||||||
|
D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
|
||||||
|
D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
|
||||||
|
}
|
||||||
|
MIB.addReg(D0).addReg(D1);
|
||||||
|
if (NumRegs > 2)
|
||||||
|
MIB.addReg(D2);
|
||||||
|
if (NumRegs > 3)
|
||||||
|
MIB.addReg(D3);
|
||||||
|
|
||||||
|
if (hasWriteBack) {
|
||||||
|
bool WBIsDead = MI.getOperand(OpIdx).isDead();
|
||||||
|
unsigned WBReg = MI.getOperand(OpIdx++).getReg();
|
||||||
|
MIB.addReg(WBReg, getDefRegState(true) | getDeadRegState(WBIsDead));
|
||||||
|
}
|
||||||
|
// Copy the addrmode6 operands.
|
||||||
|
bool AddrIsKill = MI.getOperand(OpIdx).isKill();
|
||||||
|
MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
|
||||||
|
MIB.addImm(MI.getOperand(OpIdx++).getImm());
|
||||||
|
if (hasWriteBack) {
|
||||||
|
// Copy the am6offset operand.
|
||||||
|
bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
|
||||||
|
MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
|
||||||
|
}
|
||||||
|
|
||||||
|
MIB = AddDefaultPred(MIB);
|
||||||
|
TransferImpOps(MI, MIB, MIB);
|
||||||
|
// Add an implicit def for the super-reg.
|
||||||
|
MIB.addReg(DstReg, (getDefRegState(true) | getDeadRegState(DstIsDead) |
|
||||||
|
getImplRegState(true)));
|
||||||
|
MI.eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
|
/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
|
||||||
/// operands to real VST instructions with D register operands.
|
/// operands to real VST instructions with D register operands.
|
||||||
void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
|
void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
|
||||||
@@ -232,6 +294,58 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
|||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case ARM::VLD1q8Pseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD1q16Pseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD1q32Pseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD1q64Pseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD1q8Pseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD1q16Pseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD1q32Pseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD1q64Pseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
|
||||||
|
|
||||||
|
case ARM::VLD2d8Pseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD2d16Pseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD2d32Pseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD2q8Pseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
|
||||||
|
case ARM::VLD2q16Pseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
|
||||||
|
case ARM::VLD2q32Pseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
|
||||||
|
case ARM::VLD2d8Pseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD2d16Pseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD2d32Pseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
|
||||||
|
case ARM::VLD2q8Pseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
|
||||||
|
case ARM::VLD2q16Pseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
|
||||||
|
case ARM::VLD2q32Pseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
|
||||||
|
|
||||||
|
case ARM::VLD1d64TPseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
|
||||||
|
case ARM::VLD1d64TPseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1d64T, true, SingleSpc, 3); break;
|
||||||
|
|
||||||
|
case ARM::VLD1d64QPseudo:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
|
||||||
|
case ARM::VLD1d64QPseudo_UPD:
|
||||||
|
ExpandVLD(MBBI, ARM::VLD1d64Q, true, SingleSpc, 4); break;
|
||||||
|
|
||||||
case ARM::VST1q8Pseudo:
|
case ARM::VST1q8Pseudo:
|
||||||
ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
|
ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
|
||||||
case ARM::VST1q16Pseudo:
|
case ARM::VST1q16Pseudo:
|
||||||
|
@@ -1116,35 +1116,44 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
|||||||
if (is64BitVector) {
|
if (is64BitVector) {
|
||||||
unsigned Opc = DOpcodes[OpcodeIndex];
|
unsigned Opc = DOpcodes[OpcodeIndex];
|
||||||
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
|
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
|
||||||
std::vector<EVT> ResTys(NumVecs, VT);
|
SDNode *VLd;
|
||||||
ResTys.push_back(MVT::Other);
|
if (NumVecs <= 2) {
|
||||||
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
|
EVT ResTy;
|
||||||
if (NumVecs < 2)
|
if (NumVecs == 1)
|
||||||
|
ResTy = VT;
|
||||||
|
else
|
||||||
|
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs);
|
||||||
|
VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
|
||||||
|
} else {
|
||||||
|
std::vector<EVT> ResTys(NumVecs, VT);
|
||||||
|
ResTys.push_back(MVT::Other);
|
||||||
|
VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
|
||||||
|
}
|
||||||
|
if (NumVecs == 1)
|
||||||
return VLd;
|
return VLd;
|
||||||
|
|
||||||
SDValue RegSeq;
|
SDValue SuperReg;
|
||||||
SDValue V0 = SDValue(VLd, 0);
|
if (NumVecs <= 2)
|
||||||
SDValue V1 = SDValue(VLd, 1);
|
SuperReg = SDValue(VLd, 0);
|
||||||
|
|
||||||
// Form a REG_SEQUENCE to force register allocation.
|
|
||||||
if (NumVecs == 2)
|
|
||||||
RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
|
|
||||||
else {
|
else {
|
||||||
|
SDValue V0 = SDValue(VLd, 0);
|
||||||
|
SDValue V1 = SDValue(VLd, 1);
|
||||||
|
// Form a REG_SEQUENCE to force register allocation.
|
||||||
SDValue V2 = SDValue(VLd, 2);
|
SDValue V2 = SDValue(VLd, 2);
|
||||||
// If it's a vld3, form a quad D-register but discard the last part.
|
// If it's a vld3, form a quad D-register but discard the last part.
|
||||||
SDValue V3 = (NumVecs == 3)
|
SDValue V3 = (NumVecs == 3)
|
||||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
|
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
|
||||||
: SDValue(VLd, 3);
|
: SDValue(VLd, 3);
|
||||||
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
||||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
|
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
|
||||||
SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
|
SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
|
||||||
dl, VT, RegSeq);
|
dl, VT, SuperReg);
|
||||||
ReplaceUses(SDValue(N, Vec), D);
|
ReplaceUses(SDValue(N, Vec), D);
|
||||||
}
|
}
|
||||||
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
|
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs <= 2 ? 1 : NumVecs));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1154,24 +1163,25 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
|||||||
// loading pairs of D regs.
|
// loading pairs of D regs.
|
||||||
unsigned Opc = QOpcodes0[OpcodeIndex];
|
unsigned Opc = QOpcodes0[OpcodeIndex];
|
||||||
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
|
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
|
||||||
std::vector<EVT> ResTys(2 * NumVecs, RegVT);
|
|
||||||
ResTys.push_back(MVT::Other);
|
EVT ResTy;
|
||||||
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
|
if (NumVecs == 1)
|
||||||
Chain = SDValue(VLd, 2 * NumVecs);
|
ResTy = VT;
|
||||||
|
else
|
||||||
|
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, 2 * NumVecs);
|
||||||
|
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
|
||||||
|
|
||||||
// Combine the even and odd subregs to produce the result.
|
// Combine the even and odd subregs to produce the result.
|
||||||
if (NumVecs == 1) {
|
if (NumVecs == 1)
|
||||||
SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
|
return VLd;
|
||||||
ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
|
|
||||||
} else {
|
SDValue QQ = SDValue(VLd, 0);
|
||||||
SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
|
Chain = SDValue(VLd, 1);
|
||||||
SDValue(VLd, 0), SDValue(VLd, 1),
|
|
||||||
SDValue(VLd, 2), SDValue(VLd, 3)), 0);
|
SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
|
||||||
SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
|
SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
|
||||||
SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
|
ReplaceUses(SDValue(N, 0), Q0);
|
||||||
ReplaceUses(SDValue(N, 0), Q0);
|
ReplaceUses(SDValue(N, 1), Q1);
|
||||||
ReplaceUses(SDValue(N, 1), Q1);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, quad registers are loaded with two separate instructions,
|
// Otherwise, quad registers are loaded with two separate instructions,
|
||||||
// where one loads the even registers and the other loads the odd registers.
|
// where one loads the even registers and the other loads the odd registers.
|
||||||
@@ -2142,15 +2152,16 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
|||||||
case Intrinsic::arm_neon_vld1: {
|
case Intrinsic::arm_neon_vld1: {
|
||||||
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
|
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
|
||||||
ARM::VLD1d32, ARM::VLD1d64 };
|
ARM::VLD1d32, ARM::VLD1d64 };
|
||||||
unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
|
unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
|
||||||
ARM::VLD1q32, ARM::VLD1q64 };
|
ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
|
||||||
return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
|
return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
case Intrinsic::arm_neon_vld2: {
|
case Intrinsic::arm_neon_vld2: {
|
||||||
unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
|
unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
|
||||||
ARM::VLD2d32, ARM::VLD1q64 };
|
ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
|
||||||
unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
|
unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
|
||||||
|
ARM::VLD2q32Pseudo };
|
||||||
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
|
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -167,6 +167,21 @@ def VST1q
|
|||||||
|
|
||||||
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
||||||
|
|
||||||
|
// Classes for VLD* pseudo-instructions with multi-register operands.
|
||||||
|
// These are expanded to real instructions after register allocation.
|
||||||
|
class VLDQPseudo
|
||||||
|
: PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
|
||||||
|
class VLDQWBPseudo
|
||||||
|
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
|
||||||
|
(ins addrmode6:$addr, am6offset:$offset), IIC_VST,
|
||||||
|
"$addr.addr = $wb">;
|
||||||
|
class VLDQQPseudo
|
||||||
|
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
|
||||||
|
class VLDQQWBPseudo
|
||||||
|
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
|
||||||
|
(ins addrmode6:$addr, am6offset:$offset), IIC_VST,
|
||||||
|
"$addr.addr = $wb">;
|
||||||
|
|
||||||
// VLD1 : Vector Load (multiple single elements)
|
// VLD1 : Vector Load (multiple single elements)
|
||||||
class VLD1D<bits<4> op7_4, string Dt>
|
class VLD1D<bits<4> op7_4, string Dt>
|
||||||
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
|
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
|
||||||
@@ -187,6 +202,11 @@ def VLD1q16 : VLD1Q<0b0100, "16">;
|
|||||||
def VLD1q32 : VLD1Q<0b1000, "32">;
|
def VLD1q32 : VLD1Q<0b1000, "32">;
|
||||||
def VLD1q64 : VLD1Q<0b1100, "64">;
|
def VLD1q64 : VLD1Q<0b1100, "64">;
|
||||||
|
|
||||||
|
def VLD1q8Pseudo : VLDQPseudo;
|
||||||
|
def VLD1q16Pseudo : VLDQPseudo;
|
||||||
|
def VLD1q32Pseudo : VLDQPseudo;
|
||||||
|
def VLD1q64Pseudo : VLDQPseudo;
|
||||||
|
|
||||||
// ...with address register writeback:
|
// ...with address register writeback:
|
||||||
class VLD1DWB<bits<4> op7_4, string Dt>
|
class VLD1DWB<bits<4> op7_4, string Dt>
|
||||||
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
|
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
|
||||||
@@ -209,6 +229,11 @@ def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
|
|||||||
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
|
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
|
||||||
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
|
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
|
||||||
|
|
||||||
|
def VLD1q8Pseudo_UPD : VLDQWBPseudo;
|
||||||
|
def VLD1q16Pseudo_UPD : VLDQWBPseudo;
|
||||||
|
def VLD1q32Pseudo_UPD : VLDQWBPseudo;
|
||||||
|
def VLD1q64Pseudo_UPD : VLDQWBPseudo;
|
||||||
|
|
||||||
// ...with 3 registers (some of these are only for the disassembler):
|
// ...with 3 registers (some of these are only for the disassembler):
|
||||||
class VLD1D3<bits<4> op7_4, string Dt>
|
class VLD1D3<bits<4> op7_4, string Dt>
|
||||||
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
|
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
|
||||||
@@ -229,6 +254,9 @@ def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
|
|||||||
def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
|
def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
|
||||||
def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
|
def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
|
||||||
|
|
||||||
|
def VLD1d64TPseudo : VLDQQPseudo;
|
||||||
|
def VLD1d64TPseudo_UPD : VLDQQWBPseudo;
|
||||||
|
|
||||||
// ...with 4 registers (some of these are only for the disassembler):
|
// ...with 4 registers (some of these are only for the disassembler):
|
||||||
class VLD1D4<bits<4> op7_4, string Dt>
|
class VLD1D4<bits<4> op7_4, string Dt>
|
||||||
: NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
|
: NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
|
||||||
@@ -251,6 +279,9 @@ def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
|
|||||||
def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
|
def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
|
||||||
def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
|
def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
|
||||||
|
|
||||||
|
def VLD1d64QPseudo : VLDQQPseudo;
|
||||||
|
def VLD1d64QPseudo_UPD : VLDQQWBPseudo;
|
||||||
|
|
||||||
// VLD2 : Vector Load (multiple 2-element structures)
|
// VLD2 : Vector Load (multiple 2-element structures)
|
||||||
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
|
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||||
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
|
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
|
||||||
@@ -270,6 +301,14 @@ def VLD2q8 : VLD2Q<0b0000, "8">;
|
|||||||
def VLD2q16 : VLD2Q<0b0100, "16">;
|
def VLD2q16 : VLD2Q<0b0100, "16">;
|
||||||
def VLD2q32 : VLD2Q<0b1000, "32">;
|
def VLD2q32 : VLD2Q<0b1000, "32">;
|
||||||
|
|
||||||
|
def VLD2d8Pseudo : VLDQPseudo;
|
||||||
|
def VLD2d16Pseudo : VLDQPseudo;
|
||||||
|
def VLD2d32Pseudo : VLDQPseudo;
|
||||||
|
|
||||||
|
def VLD2q8Pseudo : VLDQQPseudo;
|
||||||
|
def VLD2q16Pseudo : VLDQQPseudo;
|
||||||
|
def VLD2q32Pseudo : VLDQQPseudo;
|
||||||
|
|
||||||
// ...with address register writeback:
|
// ...with address register writeback:
|
||||||
class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||||
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
|
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
|
||||||
@@ -291,6 +330,14 @@ def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
|
|||||||
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
|
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
|
||||||
def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
|
def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
|
||||||
|
|
||||||
|
def VLD2d8Pseudo_UPD : VLDQWBPseudo;
|
||||||
|
def VLD2d16Pseudo_UPD : VLDQWBPseudo;
|
||||||
|
def VLD2d32Pseudo_UPD : VLDQWBPseudo;
|
||||||
|
|
||||||
|
def VLD2q8Pseudo_UPD : VLDQQWBPseudo;
|
||||||
|
def VLD2q16Pseudo_UPD : VLDQQWBPseudo;
|
||||||
|
def VLD2q32Pseudo_UPD : VLDQQWBPseudo;
|
||||||
|
|
||||||
// ...with double-spaced registers (for disassembly only):
|
// ...with double-spaced registers (for disassembly only):
|
||||||
def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
|
def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
|
||||||
def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
|
def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
|
||||||
@@ -531,10 +578,10 @@ def VST1q16 : VST1Q<0b0100, "16">;
|
|||||||
def VST1q32 : VST1Q<0b1000, "32">;
|
def VST1q32 : VST1Q<0b1000, "32">;
|
||||||
def VST1q64 : VST1Q<0b1100, "64">;
|
def VST1q64 : VST1Q<0b1100, "64">;
|
||||||
|
|
||||||
def VST1q8Pseudo : VSTQPseudo;
|
def VST1q8Pseudo : VSTQPseudo;
|
||||||
def VST1q16Pseudo : VSTQPseudo;
|
def VST1q16Pseudo : VSTQPseudo;
|
||||||
def VST1q32Pseudo : VSTQPseudo;
|
def VST1q32Pseudo : VSTQPseudo;
|
||||||
def VST1q64Pseudo : VSTQPseudo;
|
def VST1q64Pseudo : VSTQPseudo;
|
||||||
|
|
||||||
// ...with address register writeback:
|
// ...with address register writeback:
|
||||||
class VST1DWB<bits<4> op7_4, string Dt>
|
class VST1DWB<bits<4> op7_4, string Dt>
|
||||||
|
@@ -51,13 +51,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
|||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ARM::VLD1q8:
|
|
||||||
case ARM::VLD1q16:
|
|
||||||
case ARM::VLD1q32:
|
|
||||||
case ARM::VLD1q64:
|
|
||||||
case ARM::VLD2d8:
|
|
||||||
case ARM::VLD2d16:
|
|
||||||
case ARM::VLD2d32:
|
|
||||||
case ARM::VLD2LNd8:
|
case ARM::VLD2LNd8:
|
||||||
case ARM::VLD2LNd16:
|
case ARM::VLD2LNd16:
|
||||||
case ARM::VLD2LNd32:
|
case ARM::VLD2LNd32:
|
||||||
@@ -65,13 +58,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
|||||||
NumRegs = 2;
|
NumRegs = 2;
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case ARM::VLD2q8:
|
|
||||||
case ARM::VLD2q16:
|
|
||||||
case ARM::VLD2q32:
|
|
||||||
FirstOpnd = 0;
|
|
||||||
NumRegs = 4;
|
|
||||||
return true;
|
|
||||||
|
|
||||||
case ARM::VLD2LNq16:
|
case ARM::VLD2LNq16:
|
||||||
case ARM::VLD2LNq32:
|
case ARM::VLD2LNq32:
|
||||||
FirstOpnd = 0;
|
FirstOpnd = 0;
|
||||||
|
@@ -45,10 +45,10 @@ define void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr,
|
|||||||
entry:
|
entry:
|
||||||
; CHECK: t2:
|
; CHECK: t2:
|
||||||
; CHECK: vld1.16
|
; CHECK: vld1.16
|
||||||
; CHECK: vmul.i16
|
|
||||||
; CHECK-NOT: vmov
|
; CHECK-NOT: vmov
|
||||||
; CHECK: vld1.16
|
; CHECK: vld1.16
|
||||||
; CHECK: vmul.i16
|
; CHECK: vmul.i16
|
||||||
|
; CHECK: vmul.i16
|
||||||
; CHECK-NOT: vmov
|
; CHECK-NOT: vmov
|
||||||
; CHECK: vst1.16
|
; CHECK: vst1.16
|
||||||
; CHECK: vst1.16
|
; CHECK: vst1.16
|
||||||
|
Reference in New Issue
Block a user