mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-09-27 16:17:17 +00:00
Start converting NEON load/stores to use pseudo instructions, beginning here
with the VST4 instructions. Until after register allocation, we want to represent sets of adjacent registers by a single super-register. These VST4 pseudo instructions have a single QQ or QQQQ source register operand. They get expanded to the real VST4 instructions with 4 separate D register operands. Once this conversion is complete, we'll be able to remove the NEONPreAllocPass and avoid some fragile and hacky code elsewhere. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112108 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -24,6 +24,13 @@ using namespace llvm;
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
class ARMExpandPseudo : public MachineFunctionPass {
|
class ARMExpandPseudo : public MachineFunctionPass {
|
||||||
|
// Constants for register spacing in NEON load/store instructions.
|
||||||
|
enum NEONRegSpacing {
|
||||||
|
SingleSpc,
|
||||||
|
EvenDblSpc,
|
||||||
|
OddDblSpc
|
||||||
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static char ID;
|
static char ID;
|
||||||
ARMExpandPseudo() : MachineFunctionPass(ID) {}
|
ARMExpandPseudo() : MachineFunctionPass(ID) {}
|
||||||
@@ -41,6 +48,8 @@ namespace {
|
|||||||
void TransferImpOps(MachineInstr &OldMI,
|
void TransferImpOps(MachineInstr &OldMI,
|
||||||
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
|
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
|
||||||
bool ExpandMBB(MachineBasicBlock &MBB);
|
bool ExpandMBB(MachineBasicBlock &MBB);
|
||||||
|
void ExpandVST4(MachineBasicBlock::iterator &MBBI, unsigned Opc,
|
||||||
|
bool hasWriteBack, NEONRegSpacing RegSpc);
|
||||||
};
|
};
|
||||||
char ARMExpandPseudo::ID = 0;
|
char ARMExpandPseudo::ID = 0;
|
||||||
}
|
}
|
||||||
@@ -63,6 +72,61 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// ExpandVST4 - Translate VST4 pseudo instructions with QQ or QQQQ register
|
||||||
|
/// operands to real VST4 instructions with 4 D register operands.
|
||||||
|
void ARMExpandPseudo::ExpandVST4(MachineBasicBlock::iterator &MBBI,
|
||||||
|
unsigned Opc, bool hasWriteBack,
|
||||||
|
NEONRegSpacing RegSpc) {
|
||||||
|
MachineInstr &MI = *MBBI;
|
||||||
|
MachineBasicBlock &MBB = *MI.getParent();
|
||||||
|
|
||||||
|
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
|
||||||
|
unsigned OpIdx = 0;
|
||||||
|
if (hasWriteBack) {
|
||||||
|
bool DstIsDead = MI.getOperand(OpIdx).isDead();
|
||||||
|
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
|
||||||
|
MIB.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead));
|
||||||
|
}
|
||||||
|
// Copy the addrmode6 operands.
|
||||||
|
bool AddrIsKill = MI.getOperand(OpIdx).isKill();
|
||||||
|
MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
|
||||||
|
MIB.addImm(MI.getOperand(OpIdx++).getImm());
|
||||||
|
if (hasWriteBack) {
|
||||||
|
// Copy the am6offset operand.
|
||||||
|
bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
|
||||||
|
MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
|
||||||
|
unsigned SrcReg = MI.getOperand(OpIdx).getReg();
|
||||||
|
unsigned D0, D1, D2, D3;
|
||||||
|
if (RegSpc == SingleSpc) {
|
||||||
|
D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
|
||||||
|
D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
|
||||||
|
D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
|
||||||
|
D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
|
||||||
|
} else if (RegSpc == EvenDblSpc) {
|
||||||
|
D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
|
||||||
|
D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
|
||||||
|
D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
|
||||||
|
D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
|
||||||
|
} else {
|
||||||
|
assert(RegSpc == OddDblSpc && "unknown register spacing for VST4");
|
||||||
|
D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
|
||||||
|
D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
|
||||||
|
D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
|
||||||
|
D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
|
||||||
|
}
|
||||||
|
|
||||||
|
MIB.addReg(D0, getKillRegState(SrcIsKill))
|
||||||
|
.addReg(D1, getKillRegState(SrcIsKill))
|
||||||
|
.addReg(D2, getKillRegState(SrcIsKill))
|
||||||
|
.addReg(D3, getKillRegState(SrcIsKill));
|
||||||
|
MIB = AddDefaultPred(MIB);
|
||||||
|
TransferImpOps(MI, MIB, MIB);
|
||||||
|
MI.eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||||
bool Modified = false;
|
bool Modified = false;
|
||||||
|
|
||||||
@@ -71,9 +135,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
|||||||
MachineInstr &MI = *MBBI;
|
MachineInstr &MI = *MBBI;
|
||||||
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
|
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
|
||||||
|
|
||||||
|
bool ModifiedOp = true;
|
||||||
unsigned Opcode = MI.getOpcode();
|
unsigned Opcode = MI.getOpcode();
|
||||||
switch (Opcode) {
|
switch (Opcode) {
|
||||||
default: break;
|
default:
|
||||||
|
ModifiedOp = false;
|
||||||
|
break;
|
||||||
|
|
||||||
case ARM::tLDRpci_pic:
|
case ARM::tLDRpci_pic:
|
||||||
case ARM::t2LDRpci_pic: {
|
case ARM::t2LDRpci_pic: {
|
||||||
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
|
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
|
||||||
@@ -92,7 +160,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
|||||||
.addOperand(MI.getOperand(2));
|
.addOperand(MI.getOperand(2));
|
||||||
TransferImpOps(MI, MIB1, MIB2);
|
TransferImpOps(MI, MIB1, MIB2);
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
Modified = true;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -128,7 +195,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
|||||||
HI16.addImm(Pred).addReg(PredReg);
|
HI16.addImm(Pred).addReg(PredReg);
|
||||||
TransferImpOps(MI, LO16, HI16);
|
TransferImpOps(MI, LO16, HI16);
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
Modified = true;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -155,9 +221,37 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
|||||||
.addReg(OddSrc, getKillRegState(SrcIsKill)));
|
.addReg(OddSrc, getKillRegState(SrcIsKill)));
|
||||||
TransferImpOps(MI, Even, Odd);
|
TransferImpOps(MI, Even, Odd);
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
case ARM::VST4d8Pseudo:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4d8, false, SingleSpc); break;
|
||||||
|
case ARM::VST4d16Pseudo:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4d16, false, SingleSpc); break;
|
||||||
|
case ARM::VST4d32Pseudo:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4d32, false, SingleSpc); break;
|
||||||
|
case ARM::VST4d8Pseudo_UPD:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4d8_UPD, true, SingleSpc); break;
|
||||||
|
case ARM::VST4d16Pseudo_UPD:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4d16_UPD, true, SingleSpc); break;
|
||||||
|
case ARM::VST4d32Pseudo_UPD:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4d32_UPD, true, SingleSpc); break;
|
||||||
|
case ARM::VST4q8Pseudo_UPD:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc); break;
|
||||||
|
case ARM::VST4q16Pseudo_UPD:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc); break;
|
||||||
|
case ARM::VST4q32Pseudo_UPD:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc); break;
|
||||||
|
case ARM::VST4q8oddPseudo_UPD:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4q8_UPD, true, OddDblSpc); break;
|
||||||
|
case ARM::VST4q16oddPseudo_UPD:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4q16_UPD, true, OddDblSpc); break;
|
||||||
|
case ARM::VST4q32oddPseudo_UPD:
|
||||||
|
ExpandVST4(MBBI, ARM::VST4q32_UPD, true, OddDblSpc); break;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ModifiedOp)
|
||||||
Modified = true;
|
Modified = true;
|
||||||
}
|
|
||||||
}
|
|
||||||
MBBI = NMBBI;
|
MBBI = NMBBI;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1260,6 +1260,11 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||||||
Ops.push_back(MemAddr);
|
Ops.push_back(MemAddr);
|
||||||
Ops.push_back(Align);
|
Ops.push_back(Align);
|
||||||
|
|
||||||
|
// FIXME: This is a temporary flag to distinguish VSTs that have been
|
||||||
|
// converted to pseudo instructions.
|
||||||
|
bool usePseudoInstrs = (NumVecs == 4 &&
|
||||||
|
VT.getSimpleVT().SimpleTy != MVT::v1i64);
|
||||||
|
|
||||||
if (is64BitVector) {
|
if (is64BitVector) {
|
||||||
if (NumVecs >= 2) {
|
if (NumVecs >= 2) {
|
||||||
SDValue RegSeq;
|
SDValue RegSeq;
|
||||||
@@ -1278,6 +1283,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||||||
: N->getOperand(3+3);
|
: N->getOperand(3+3);
|
||||||
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
||||||
}
|
}
|
||||||
|
if (usePseudoInstrs)
|
||||||
|
Ops.push_back(RegSeq);
|
||||||
|
else {
|
||||||
|
|
||||||
// Now extract the D registers back out.
|
// Now extract the D registers back out.
|
||||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
|
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
|
||||||
@@ -1290,15 +1298,16 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||||||
if (NumVecs > 3)
|
if (NumVecs > 3)
|
||||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
|
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
|
||||||
RegSeq));
|
RegSeq));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
Ops.push_back(N->getOperand(3));
|
||||||
Ops.push_back(N->getOperand(Vec+3));
|
|
||||||
}
|
}
|
||||||
Ops.push_back(Pred);
|
Ops.push_back(Pred);
|
||||||
Ops.push_back(Reg0); // predicate register
|
Ops.push_back(Reg0); // predicate register
|
||||||
Ops.push_back(Chain);
|
Ops.push_back(Chain);
|
||||||
unsigned Opc = DOpcodes[OpcodeIndex];
|
unsigned Opc = DOpcodes[OpcodeIndex];
|
||||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
|
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
|
||||||
|
usePseudoInstrs ? 6 : NumVecs+5);
|
||||||
}
|
}
|
||||||
|
|
||||||
EVT RegVT = GetNEONSubregVT(VT);
|
EVT RegVT = GetNEONSubregVT(VT);
|
||||||
@@ -1363,6 +1372,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||||||
// Store the even D registers.
|
// Store the even D registers.
|
||||||
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
||||||
Ops.push_back(Reg0); // post-access address offset
|
Ops.push_back(Reg0); // post-access address offset
|
||||||
|
if (usePseudoInstrs)
|
||||||
|
Ops.push_back(RegSeq);
|
||||||
|
else
|
||||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
|
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
|
||||||
RegVT, RegSeq));
|
RegVT, RegSeq));
|
||||||
@@ -1371,18 +1383,24 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||||||
Ops.push_back(Chain);
|
Ops.push_back(Chain);
|
||||||
unsigned Opc = QOpcodes0[OpcodeIndex];
|
unsigned Opc = QOpcodes0[OpcodeIndex];
|
||||||
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
|
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
|
||||||
MVT::Other, Ops.data(), NumVecs+6);
|
MVT::Other, Ops.data(),
|
||||||
|
usePseudoInstrs ? 7 : NumVecs+6);
|
||||||
Chain = SDValue(VStA, 1);
|
Chain = SDValue(VStA, 1);
|
||||||
|
|
||||||
// Store the odd D registers.
|
// Store the odd D registers.
|
||||||
Ops[0] = SDValue(VStA, 0); // MemAddr
|
Ops[0] = SDValue(VStA, 0); // MemAddr
|
||||||
|
if (usePseudoInstrs)
|
||||||
|
Ops[6] = Chain;
|
||||||
|
else {
|
||||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||||
Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
|
Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
|
||||||
RegVT, RegSeq);
|
RegVT, RegSeq);
|
||||||
Ops[NumVecs+5] = Chain;
|
Ops[NumVecs+5] = Chain;
|
||||||
|
}
|
||||||
Opc = QOpcodes1[OpcodeIndex];
|
Opc = QOpcodes1[OpcodeIndex];
|
||||||
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
|
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
|
||||||
MVT::Other, Ops.data(), NumVecs+6);
|
MVT::Other, Ops.data(),
|
||||||
|
usePseudoInstrs ? 7 : NumVecs+6);
|
||||||
Chain = SDValue(VStB, 1);
|
Chain = SDValue(VStB, 1);
|
||||||
ReplaceUses(SDValue(N, 0), Chain);
|
ReplaceUses(SDValue(N, 0), Chain);
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -2312,14 +2330,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
case Intrinsic::arm_neon_vst4: {
|
case Intrinsic::arm_neon_vst4: {
|
||||||
unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
|
unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
|
||||||
ARM::VST4d32, ARM::VST1d64Q };
|
ARM::VST4d32Pseudo, ARM::VST1d64Q };
|
||||||
unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
|
unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
|
||||||
ARM::VST4q16_UPD,
|
ARM::VST4q16Pseudo_UPD,
|
||||||
ARM::VST4q32_UPD };
|
ARM::VST4q32Pseudo_UPD };
|
||||||
unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
|
unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
|
||||||
ARM::VST4q16odd_UPD,
|
ARM::VST4q16oddPseudo_UPD,
|
||||||
ARM::VST4q32odd_UPD };
|
ARM::VST4q32oddPseudo_UPD };
|
||||||
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1534,6 +1534,14 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
|
|||||||
let Inst{7-4} = op7_4;
|
let Inst{7-4} = op7_4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
|
||||||
|
: InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
|
||||||
|
itin> {
|
||||||
|
let OutOperandList = oops;
|
||||||
|
let InOperandList = !con(iops, (ins pred:$p));
|
||||||
|
list<Predicate> Predicates = [HasNEON];
|
||||||
|
}
|
||||||
|
|
||||||
class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
|
class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
|
||||||
string opc, string dt, string asm, string cstr, list<dag> pattern>
|
string opc, string dt, string asm, string cstr, list<dag> pattern>
|
||||||
: NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
|
: NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
|
||||||
|
@@ -486,6 +486,19 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
|
|||||||
|
|
||||||
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
||||||
|
|
||||||
|
// Classes for VST* pseudo-instructions with multi-register operands.
|
||||||
|
// These are expanded to real instructions after register allocation.
|
||||||
|
class VSTQQPseudo
|
||||||
|
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
|
||||||
|
class VSTQQWBPseudo
|
||||||
|
: PseudoNLdSt<(outs GPR:$wb),
|
||||||
|
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
|
||||||
|
"$addr.addr = $wb">;
|
||||||
|
class VSTQQQQWBPseudo
|
||||||
|
: PseudoNLdSt<(outs GPR:$wb),
|
||||||
|
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
|
||||||
|
"$addr.addr = $wb">;
|
||||||
|
|
||||||
// VST1 : Vector Store (multiple single elements)
|
// VST1 : Vector Store (multiple single elements)
|
||||||
class VST1D<bits<4> op7_4, string Dt>
|
class VST1D<bits<4> op7_4, string Dt>
|
||||||
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
|
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
|
||||||
@@ -664,6 +677,10 @@ def VST4d8 : VST4D<0b0000, 0b0000, "8">;
|
|||||||
def VST4d16 : VST4D<0b0000, 0b0100, "16">;
|
def VST4d16 : VST4D<0b0000, 0b0100, "16">;
|
||||||
def VST4d32 : VST4D<0b0000, 0b1000, "32">;
|
def VST4d32 : VST4D<0b0000, 0b1000, "32">;
|
||||||
|
|
||||||
|
def VST4d8Pseudo : VSTQQPseudo;
|
||||||
|
def VST4d16Pseudo : VSTQQPseudo;
|
||||||
|
def VST4d32Pseudo : VSTQQPseudo;
|
||||||
|
|
||||||
// ...with address register writeback:
|
// ...with address register writeback:
|
||||||
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||||
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
|
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
|
||||||
@@ -676,6 +693,10 @@ def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
|
|||||||
def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
|
def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
|
||||||
def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
|
def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
|
||||||
|
|
||||||
|
def VST4d8Pseudo_UPD : VSTQQWBPseudo;
|
||||||
|
def VST4d16Pseudo_UPD : VSTQQWBPseudo;
|
||||||
|
def VST4d32Pseudo_UPD : VSTQQWBPseudo;
|
||||||
|
|
||||||
// ...with double-spaced registers (non-updating versions for disassembly only):
|
// ...with double-spaced registers (non-updating versions for disassembly only):
|
||||||
def VST4q8 : VST4D<0b0001, 0b0000, "8">;
|
def VST4q8 : VST4D<0b0001, 0b0000, "8">;
|
||||||
def VST4q16 : VST4D<0b0001, 0b0100, "16">;
|
def VST4q16 : VST4D<0b0001, 0b0100, "16">;
|
||||||
@@ -684,10 +705,14 @@ def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">;
|
|||||||
def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
|
def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
|
||||||
def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
|
def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
|
||||||
|
|
||||||
|
def VST4q8Pseudo_UPD : VSTQQQQWBPseudo;
|
||||||
|
def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
|
||||||
|
def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
|
||||||
|
|
||||||
// ...alternate versions to be allocated odd register numbers:
|
// ...alternate versions to be allocated odd register numbers:
|
||||||
def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">;
|
def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||||
def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
|
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||||
def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
|
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||||
|
|
||||||
// VST1LN : Vector Store (single element from one lane)
|
// VST1LN : Vector Store (single element from one lane)
|
||||||
// FIXME: Not yet implemented.
|
// FIXME: Not yet implemented.
|
||||||
|
@@ -260,9 +260,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
|||||||
Stride = 2;
|
Stride = 2;
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case ARM::VST4d8:
|
|
||||||
case ARM::VST4d16:
|
|
||||||
case ARM::VST4d32:
|
|
||||||
case ARM::VST1d64Q:
|
case ARM::VST1d64Q:
|
||||||
case ARM::VST4LNd8:
|
case ARM::VST4LNd8:
|
||||||
case ARM::VST4LNd16:
|
case ARM::VST4LNd16:
|
||||||
@@ -271,24 +268,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
|||||||
NumRegs = 4;
|
NumRegs = 4;
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case ARM::VST4q8_UPD:
|
|
||||||
case ARM::VST4q16_UPD:
|
|
||||||
case ARM::VST4q32_UPD:
|
|
||||||
FirstOpnd = 4;
|
|
||||||
NumRegs = 4;
|
|
||||||
Offset = 0;
|
|
||||||
Stride = 2;
|
|
||||||
return true;
|
|
||||||
|
|
||||||
case ARM::VST4q8odd_UPD:
|
|
||||||
case ARM::VST4q16odd_UPD:
|
|
||||||
case ARM::VST4q32odd_UPD:
|
|
||||||
FirstOpnd = 4;
|
|
||||||
NumRegs = 4;
|
|
||||||
Offset = 1;
|
|
||||||
Stride = 2;
|
|
||||||
return true;
|
|
||||||
|
|
||||||
case ARM::VST4LNq16:
|
case ARM::VST4LNq16:
|
||||||
case ARM::VST4LNq32:
|
case ARM::VST4LNq32:
|
||||||
FirstOpnd = 2;
|
FirstOpnd = 2;
|
||||||
|
Reference in New Issue
Block a user