mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-03 02:31:26 +00:00
Start converting NEON load/stores to use pseudo instructions, beginning here
with the VST4 instructions. Until after register allocation, we want to represent sets of adjacent registers by a single super-register. These VST4 pseudo instructions have a single QQ or QQQQ source register operand. They get expanded to the real VST4 instructions with 4 separate D register operands. Once this conversion is complete, we'll be able to remove the NEONPreAllocPass and avoid some fragile and hacky code elsewhere. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112108 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5b5f7260a0
commit
709d59255a
@ -24,6 +24,13 @@ using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class ARMExpandPseudo : public MachineFunctionPass {
|
||||
// Constants for register spacing in NEON load/store instructions.
|
||||
enum NEONRegSpacing {
|
||||
SingleSpc,
|
||||
EvenDblSpc,
|
||||
OddDblSpc
|
||||
};
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
ARMExpandPseudo() : MachineFunctionPass(ID) {}
|
||||
@ -41,6 +48,8 @@ namespace {
|
||||
void TransferImpOps(MachineInstr &OldMI,
|
||||
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
|
||||
bool ExpandMBB(MachineBasicBlock &MBB);
|
||||
void ExpandVST4(MachineBasicBlock::iterator &MBBI, unsigned Opc,
|
||||
bool hasWriteBack, NEONRegSpacing RegSpc);
|
||||
};
|
||||
char ARMExpandPseudo::ID = 0;
|
||||
}
|
||||
@ -63,6 +72,61 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
|
||||
}
|
||||
}
|
||||
|
||||
/// ExpandVST4 - Translate VST4 pseudo instructions with QQ or QQQQ register
|
||||
/// operands to real VST4 instructions with 4 D register operands.
|
||||
void ARMExpandPseudo::ExpandVST4(MachineBasicBlock::iterator &MBBI,
|
||||
unsigned Opc, bool hasWriteBack,
|
||||
NEONRegSpacing RegSpc) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
|
||||
unsigned OpIdx = 0;
|
||||
if (hasWriteBack) {
|
||||
bool DstIsDead = MI.getOperand(OpIdx).isDead();
|
||||
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
|
||||
MIB.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead));
|
||||
}
|
||||
// Copy the addrmode6 operands.
|
||||
bool AddrIsKill = MI.getOperand(OpIdx).isKill();
|
||||
MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
|
||||
MIB.addImm(MI.getOperand(OpIdx++).getImm());
|
||||
if (hasWriteBack) {
|
||||
// Copy the am6offset operand.
|
||||
bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
|
||||
MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
|
||||
}
|
||||
|
||||
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
|
||||
unsigned SrcReg = MI.getOperand(OpIdx).getReg();
|
||||
unsigned D0, D1, D2, D3;
|
||||
if (RegSpc == SingleSpc) {
|
||||
D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
|
||||
D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
|
||||
D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
|
||||
D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
|
||||
} else if (RegSpc == EvenDblSpc) {
|
||||
D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
|
||||
D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
|
||||
D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
|
||||
D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
|
||||
} else {
|
||||
assert(RegSpc == OddDblSpc && "unknown register spacing for VST4");
|
||||
D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
|
||||
D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
|
||||
D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
|
||||
D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
|
||||
}
|
||||
|
||||
MIB.addReg(D0, getKillRegState(SrcIsKill))
|
||||
.addReg(D1, getKillRegState(SrcIsKill))
|
||||
.addReg(D2, getKillRegState(SrcIsKill))
|
||||
.addReg(D3, getKillRegState(SrcIsKill));
|
||||
MIB = AddDefaultPred(MIB);
|
||||
TransferImpOps(MI, MIB, MIB);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
bool Modified = false;
|
||||
|
||||
@ -71,9 +135,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
|
||||
|
||||
bool ModifiedOp = true;
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
switch (Opcode) {
|
||||
default: break;
|
||||
default:
|
||||
ModifiedOp = false;
|
||||
break;
|
||||
|
||||
case ARM::tLDRpci_pic:
|
||||
case ARM::t2LDRpci_pic: {
|
||||
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
|
||||
@ -92,7 +160,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
.addOperand(MI.getOperand(2));
|
||||
TransferImpOps(MI, MIB1, MIB2);
|
||||
MI.eraseFromParent();
|
||||
Modified = true;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -128,7 +195,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
HI16.addImm(Pred).addReg(PredReg);
|
||||
TransferImpOps(MI, LO16, HI16);
|
||||
MI.eraseFromParent();
|
||||
Modified = true;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -155,9 +221,37 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
.addReg(OddSrc, getKillRegState(SrcIsKill)));
|
||||
TransferImpOps(MI, Even, Odd);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
case ARM::VST4d8Pseudo:
|
||||
ExpandVST4(MBBI, ARM::VST4d8, false, SingleSpc); break;
|
||||
case ARM::VST4d16Pseudo:
|
||||
ExpandVST4(MBBI, ARM::VST4d16, false, SingleSpc); break;
|
||||
case ARM::VST4d32Pseudo:
|
||||
ExpandVST4(MBBI, ARM::VST4d32, false, SingleSpc); break;
|
||||
case ARM::VST4d8Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4d8_UPD, true, SingleSpc); break;
|
||||
case ARM::VST4d16Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4d16_UPD, true, SingleSpc); break;
|
||||
case ARM::VST4d32Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4d32_UPD, true, SingleSpc); break;
|
||||
case ARM::VST4q8Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc); break;
|
||||
case ARM::VST4q16Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc); break;
|
||||
case ARM::VST4q32Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc); break;
|
||||
case ARM::VST4q8oddPseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q8_UPD, true, OddDblSpc); break;
|
||||
case ARM::VST4q16oddPseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q16_UPD, true, OddDblSpc); break;
|
||||
case ARM::VST4q32oddPseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q32_UPD, true, OddDblSpc); break;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ModifiedOp)
|
||||
Modified = true;
|
||||
}
|
||||
}
|
||||
MBBI = NMBBI;
|
||||
}
|
||||
|
||||
|
@ -1260,6 +1260,11 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
Ops.push_back(MemAddr);
|
||||
Ops.push_back(Align);
|
||||
|
||||
// FIXME: This is a temporary flag to distinguish VSTs that have been
|
||||
// converted to pseudo instructions.
|
||||
bool usePseudoInstrs = (NumVecs == 4 &&
|
||||
VT.getSimpleVT().SimpleTy != MVT::v1i64);
|
||||
|
||||
if (is64BitVector) {
|
||||
if (NumVecs >= 2) {
|
||||
SDValue RegSeq;
|
||||
@ -1278,6 +1283,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
: N->getOperand(3+3);
|
||||
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
||||
}
|
||||
if (usePseudoInstrs)
|
||||
Ops.push_back(RegSeq);
|
||||
else {
|
||||
|
||||
// Now extract the D registers back out.
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
|
||||
@ -1290,15 +1298,16 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
if (NumVecs > 3)
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
|
||||
RegSeq));
|
||||
}
|
||||
} else {
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||
Ops.push_back(N->getOperand(Vec+3));
|
||||
Ops.push_back(N->getOperand(3));
|
||||
}
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0); // predicate register
|
||||
Ops.push_back(Chain);
|
||||
unsigned Opc = DOpcodes[OpcodeIndex];
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
|
||||
usePseudoInstrs ? 6 : NumVecs+5);
|
||||
}
|
||||
|
||||
EVT RegVT = GetNEONSubregVT(VT);
|
||||
@ -1363,6 +1372,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
// Store the even D registers.
|
||||
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
||||
Ops.push_back(Reg0); // post-access address offset
|
||||
if (usePseudoInstrs)
|
||||
Ops.push_back(RegSeq);
|
||||
else
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
|
||||
RegVT, RegSeq));
|
||||
@ -1371,18 +1383,24 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
Ops.push_back(Chain);
|
||||
unsigned Opc = QOpcodes0[OpcodeIndex];
|
||||
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
|
||||
MVT::Other, Ops.data(), NumVecs+6);
|
||||
MVT::Other, Ops.data(),
|
||||
usePseudoInstrs ? 7 : NumVecs+6);
|
||||
Chain = SDValue(VStA, 1);
|
||||
|
||||
// Store the odd D registers.
|
||||
Ops[0] = SDValue(VStA, 0); // MemAddr
|
||||
if (usePseudoInstrs)
|
||||
Ops[6] = Chain;
|
||||
else {
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||
Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
|
||||
RegVT, RegSeq);
|
||||
Ops[NumVecs+5] = Chain;
|
||||
}
|
||||
Opc = QOpcodes1[OpcodeIndex];
|
||||
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
|
||||
MVT::Other, Ops.data(), NumVecs+6);
|
||||
MVT::Other, Ops.data(),
|
||||
usePseudoInstrs ? 7 : NumVecs+6);
|
||||
Chain = SDValue(VStB, 1);
|
||||
ReplaceUses(SDValue(N, 0), Chain);
|
||||
return NULL;
|
||||
@ -2312,14 +2330,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst4: {
|
||||
unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
|
||||
ARM::VST4d32, ARM::VST1d64Q };
|
||||
unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
|
||||
ARM::VST4q16_UPD,
|
||||
ARM::VST4q32_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
|
||||
ARM::VST4q16odd_UPD,
|
||||
ARM::VST4q32odd_UPD };
|
||||
unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
|
||||
ARM::VST4d32Pseudo, ARM::VST1d64Q };
|
||||
unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
|
||||
ARM::VST4q16Pseudo_UPD,
|
||||
ARM::VST4q32Pseudo_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
|
||||
ARM::VST4q16oddPseudo_UPD,
|
||||
ARM::VST4q32oddPseudo_UPD };
|
||||
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
|
@ -1534,6 +1534,14 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
|
||||
let Inst{7-4} = op7_4;
|
||||
}
|
||||
|
||||
class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
|
||||
: InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
|
||||
itin> {
|
||||
let OutOperandList = oops;
|
||||
let InOperandList = !con(iops, (ins pred:$p));
|
||||
list<Predicate> Predicates = [HasNEON];
|
||||
}
|
||||
|
||||
class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
|
||||
string opc, string dt, string asm, string cstr, list<dag> pattern>
|
||||
: NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
|
||||
|
@ -486,6 +486,19 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
|
||||
|
||||
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
||||
|
||||
// Classes for VST* pseudo-instructions with multi-register operands.
|
||||
// These are expanded to real instructions after register allocation.
|
||||
class VSTQQPseudo
|
||||
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
|
||||
class VSTQQWBPseudo
|
||||
: PseudoNLdSt<(outs GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
|
||||
"$addr.addr = $wb">;
|
||||
class VSTQQQQWBPseudo
|
||||
: PseudoNLdSt<(outs GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
|
||||
"$addr.addr = $wb">;
|
||||
|
||||
// VST1 : Vector Store (multiple single elements)
|
||||
class VST1D<bits<4> op7_4, string Dt>
|
||||
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
|
||||
@ -664,6 +677,10 @@ def VST4d8 : VST4D<0b0000, 0b0000, "8">;
|
||||
def VST4d16 : VST4D<0b0000, 0b0100, "16">;
|
||||
def VST4d32 : VST4D<0b0000, 0b1000, "32">;
|
||||
|
||||
def VST4d8Pseudo : VSTQQPseudo;
|
||||
def VST4d16Pseudo : VSTQQPseudo;
|
||||
def VST4d32Pseudo : VSTQQPseudo;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
|
||||
@ -676,6 +693,10 @@ def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
|
||||
def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
|
||||
def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
|
||||
|
||||
def VST4d8Pseudo_UPD : VSTQQWBPseudo;
|
||||
def VST4d16Pseudo_UPD : VSTQQWBPseudo;
|
||||
def VST4d32Pseudo_UPD : VSTQQWBPseudo;
|
||||
|
||||
// ...with double-spaced registers (non-updating versions for disassembly only):
|
||||
def VST4q8 : VST4D<0b0001, 0b0000, "8">;
|
||||
def VST4q16 : VST4D<0b0001, 0b0100, "16">;
|
||||
@ -684,10 +705,14 @@ def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">;
|
||||
def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
|
||||
def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
|
||||
|
||||
def VST4q8Pseudo_UPD : VSTQQQQWBPseudo;
|
||||
def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
|
||||
def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
|
||||
|
||||
// ...alternate versions to be allocated odd register numbers:
|
||||
def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">;
|
||||
def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
|
||||
def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
|
||||
def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||
|
||||
// VST1LN : Vector Store (single element from one lane)
|
||||
// FIXME: Not yet implemented.
|
||||
|
@ -260,9 +260,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST4d8:
|
||||
case ARM::VST4d16:
|
||||
case ARM::VST4d32:
|
||||
case ARM::VST1d64Q:
|
||||
case ARM::VST4LNd8:
|
||||
case ARM::VST4LNd16:
|
||||
@ -271,24 +268,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
||||
NumRegs = 4;
|
||||
return true;
|
||||
|
||||
case ARM::VST4q8_UPD:
|
||||
case ARM::VST4q16_UPD:
|
||||
case ARM::VST4q32_UPD:
|
||||
FirstOpnd = 4;
|
||||
NumRegs = 4;
|
||||
Offset = 0;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST4q8odd_UPD:
|
||||
case ARM::VST4q16odd_UPD:
|
||||
case ARM::VST4q32odd_UPD:
|
||||
FirstOpnd = 4;
|
||||
NumRegs = 4;
|
||||
Offset = 1;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST4LNq16:
|
||||
case ARM::VST4LNq32:
|
||||
FirstOpnd = 2;
|
||||
|
Loading…
x
Reference in New Issue
Block a user