mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-05 13:26:55 +00:00
Add codegen support for using post-increment NEON load/store instructions.
The vld1-lane, vld1-dup and vst1-lane instructions do not yet support using post-increment versions, but all the rest of the NEON load/store instructions should be handled now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@125014 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -196,26 +196,30 @@ private:
|
||||
/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
|
||||
/// loads of D registers and even subregs and odd subregs of Q registers.
|
||||
/// For NumVecs <= 2, QOpcodes1 is not used.
|
||||
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
|
||||
SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
|
||||
unsigned *DOpcodes,
|
||||
unsigned *QOpcodes0, unsigned *QOpcodes1);
|
||||
|
||||
/// SelectVST - Select NEON store intrinsics. NumVecs should
|
||||
/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
|
||||
/// stores of D registers and even subregs and odd subregs of Q registers.
|
||||
/// For NumVecs <= 2, QOpcodes1 is not used.
|
||||
SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
|
||||
SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
|
||||
unsigned *DOpcodes,
|
||||
unsigned *QOpcodes0, unsigned *QOpcodes1);
|
||||
|
||||
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
|
||||
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
|
||||
/// load/store of D registers and Q registers.
|
||||
SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs,
|
||||
SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
|
||||
bool isUpdating, unsigned NumVecs,
|
||||
unsigned *DOpcodes, unsigned *QOpcodes);
|
||||
|
||||
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
|
||||
/// should be 2, 3 or 4. The opcode array specifies the instructions used
|
||||
/// for loading D registers. (Q registers are not supported.)
|
||||
SDNode *SelectVLDDup(SDNode *N, unsigned NumVecs, unsigned *Opcodes);
|
||||
SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
|
||||
unsigned *Opcodes);
|
||||
|
||||
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
|
||||
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
|
||||
@@ -1439,14 +1443,15 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
|
||||
return CurDAG->getTargetConstant(Alignment, MVT::i32);
|
||||
}
|
||||
|
||||
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
||||
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
|
||||
unsigned *DOpcodes, unsigned *QOpcodes0,
|
||||
unsigned *QOpcodes1) {
|
||||
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
SDValue MemAddr, Align;
|
||||
if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
|
||||
unsigned AddrOpIdx = isUpdating ? 1 : 2;
|
||||
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
|
||||
return NULL;
|
||||
|
||||
SDValue Chain = N->getOperand(0);
|
||||
@@ -1482,46 +1487,39 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
||||
ResTyElts *= 2;
|
||||
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
|
||||
}
|
||||
std::vector<EVT> ResTys;
|
||||
ResTys.push_back(ResTy);
|
||||
if (isUpdating)
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::Other);
|
||||
|
||||
SDValue Pred = getAL(CurDAG);
|
||||
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
|
||||
SDValue SuperReg;
|
||||
if (is64BitVector) {
|
||||
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
|
||||
SDNode *VLd = CurDAG->getMachineNode(DOpcodes[OpcodeIndex], dl,
|
||||
ResTy, MVT::Other, Ops, 5);
|
||||
if (NumVecs == 1)
|
||||
return VLd;
|
||||
SDNode *VLd;
|
||||
SmallVector<SDValue, 7> Ops;
|
||||
|
||||
SuperReg = SDValue(VLd, 0);
|
||||
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
|
||||
SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
|
||||
dl, VT, SuperReg);
|
||||
ReplaceUses(SDValue(N, Vec), D);
|
||||
// Double registers and VLD1/VLD2 quad registers are directly supported.
|
||||
if (is64BitVector || NumVecs <= 2) {
|
||||
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
|
||||
QOpcodes0[OpcodeIndex]);
|
||||
Ops.push_back(MemAddr);
|
||||
Ops.push_back(Align);
|
||||
if (isUpdating) {
|
||||
SDValue Inc = N->getOperand(AddrOpIdx + 1);
|
||||
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
|
||||
}
|
||||
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (NumVecs <= 2) {
|
||||
// Quad registers are directly supported for VLD1 and VLD2,
|
||||
// loading pairs of D regs.
|
||||
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
|
||||
SDNode *VLd = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
|
||||
ResTy, MVT::Other, Ops, 5);
|
||||
if (NumVecs == 1)
|
||||
return VLd;
|
||||
|
||||
SuperReg = SDValue(VLd, 0);
|
||||
Chain = SDValue(VLd, 1);
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0);
|
||||
Ops.push_back(Chain);
|
||||
VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
|
||||
|
||||
} else {
|
||||
// Otherwise, quad registers are loaded with two separate instructions,
|
||||
// where one loads the even registers and the other loads the odd registers.
|
||||
EVT AddrTy = MemAddr.getValueType();
|
||||
|
||||
// Load the even subregs.
|
||||
// Load the even subregs. This is always an updating load, so that it
|
||||
// provides the address to the second load for the odd subregs.
|
||||
SDValue ImplDef =
|
||||
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
|
||||
const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
|
||||
@@ -1530,37 +1528,54 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
||||
Chain = SDValue(VLdA, 2);
|
||||
|
||||
// Load the odd subregs.
|
||||
const SDValue OpsB[] = { SDValue(VLdA, 1), Align, SDValue(VLdA, 0),
|
||||
Pred, Reg0, Chain };
|
||||
SDNode *VLdB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl,
|
||||
ResTy, MVT::Other, OpsB, 6);
|
||||
SuperReg = SDValue(VLdB, 0);
|
||||
Chain = SDValue(VLdB, 1);
|
||||
Ops.push_back(SDValue(VLdA, 1));
|
||||
Ops.push_back(Align);
|
||||
if (isUpdating) {
|
||||
SDValue Inc = N->getOperand(AddrOpIdx + 1);
|
||||
assert(isa<ConstantSDNode>(Inc.getNode()) &&
|
||||
"only constant post-increment update allowed for VLD3/4");
|
||||
(void)Inc;
|
||||
Ops.push_back(Reg0);
|
||||
}
|
||||
Ops.push_back(SDValue(VLdA, 0));
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0);
|
||||
Ops.push_back(Chain);
|
||||
VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
|
||||
Ops.data(), Ops.size());
|
||||
}
|
||||
|
||||
// Extract out the Q registers.
|
||||
assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
|
||||
SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
|
||||
dl, VT, SuperReg);
|
||||
ReplaceUses(SDValue(N, Vec), Q);
|
||||
}
|
||||
ReplaceUses(SDValue(N, NumVecs), Chain);
|
||||
if (NumVecs == 1)
|
||||
return VLd;
|
||||
|
||||
// Extract out the subregisters.
|
||||
SDValue SuperReg = SDValue(VLd, 0);
|
||||
assert(ARM::dsub_7 == ARM::dsub_0+7 &&
|
||||
ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
|
||||
unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||
ReplaceUses(SDValue(N, Vec),
|
||||
CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
|
||||
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
|
||||
if (isUpdating)
|
||||
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
|
||||
unsigned *DOpcodes, unsigned *QOpcodes0,
|
||||
unsigned *QOpcodes1) {
|
||||
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
SDValue MemAddr, Align;
|
||||
if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
|
||||
unsigned AddrOpIdx = isUpdating ? 1 : 2;
|
||||
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
|
||||
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
|
||||
return NULL;
|
||||
|
||||
SDValue Chain = N->getOperand(0);
|
||||
EVT VT = N->getOperand(3).getValueType();
|
||||
EVT VT = N->getOperand(Vec0Idx).getValueType();
|
||||
bool is64BitVector = VT.is64BitVector();
|
||||
Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
|
||||
|
||||
@@ -1583,64 +1598,71 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
break;
|
||||
}
|
||||
|
||||
std::vector<EVT> ResTys;
|
||||
if (isUpdating)
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::Other);
|
||||
|
||||
SDValue Pred = getAL(CurDAG);
|
||||
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
|
||||
SmallVector<SDValue, 7> Ops;
|
||||
|
||||
if (is64BitVector) {
|
||||
// Double registers and VST1/VST2 quad registers are directly supported.
|
||||
if (is64BitVector || NumVecs <= 2) {
|
||||
SDValue SrcReg;
|
||||
if (NumVecs == 1) {
|
||||
SrcReg = N->getOperand(3);
|
||||
} else {
|
||||
SDValue V0 = N->getOperand(0+3);
|
||||
SDValue V1 = N->getOperand(1+3);
|
||||
|
||||
SrcReg = N->getOperand(Vec0Idx);
|
||||
} else if (is64BitVector) {
|
||||
// Form a REG_SEQUENCE to force register allocation.
|
||||
SDValue V0 = N->getOperand(Vec0Idx + 0);
|
||||
SDValue V1 = N->getOperand(Vec0Idx + 1);
|
||||
if (NumVecs == 2)
|
||||
SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
|
||||
else {
|
||||
SDValue V2 = N->getOperand(2+3);
|
||||
SDValue V2 = N->getOperand(Vec0Idx + 2);
|
||||
// If it's a vst3, form a quad D-register and leave the last part as
|
||||
// an undef.
|
||||
SDValue V3 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
|
||||
: N->getOperand(3+3);
|
||||
: N->getOperand(Vec0Idx + 3);
|
||||
SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
||||
}
|
||||
}
|
||||
const SDValue Ops[] = { MemAddr, Align, SrcReg, Pred, Reg0, Chain };
|
||||
return CurDAG->getMachineNode(DOpcodes[OpcodeIndex], dl,
|
||||
MVT::Other, Ops, 6);
|
||||
}
|
||||
|
||||
if (NumVecs <= 2) {
|
||||
// Quad registers are directly supported for VST1 and VST2.
|
||||
SDValue SrcReg;
|
||||
if (NumVecs == 1) {
|
||||
SrcReg = N->getOperand(3);
|
||||
} else {
|
||||
// Form a QQ register.
|
||||
SDValue Q0 = N->getOperand(3);
|
||||
SDValue Q1 = N->getOperand(4);
|
||||
SDValue Q0 = N->getOperand(Vec0Idx);
|
||||
SDValue Q1 = N->getOperand(Vec0Idx + 1);
|
||||
SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
|
||||
}
|
||||
const SDValue Ops[] = { MemAddr, Align, SrcReg, Pred, Reg0, Chain };
|
||||
return CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
|
||||
MVT::Other, Ops, 6);
|
||||
|
||||
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
|
||||
QOpcodes0[OpcodeIndex]);
|
||||
Ops.push_back(MemAddr);
|
||||
Ops.push_back(Align);
|
||||
if (isUpdating) {
|
||||
SDValue Inc = N->getOperand(AddrOpIdx + 1);
|
||||
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
|
||||
}
|
||||
Ops.push_back(SrcReg);
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0);
|
||||
Ops.push_back(Chain);
|
||||
return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
|
||||
}
|
||||
|
||||
// Otherwise, quad registers are stored with two separate instructions,
|
||||
// where one stores the even registers and the other stores the odd registers.
|
||||
|
||||
// Form the QQQQ REG_SEQUENCE.
|
||||
SDValue V0 = N->getOperand(0+3);
|
||||
SDValue V1 = N->getOperand(1+3);
|
||||
SDValue V2 = N->getOperand(2+3);
|
||||
SDValue V0 = N->getOperand(Vec0Idx + 0);
|
||||
SDValue V1 = N->getOperand(Vec0Idx + 1);
|
||||
SDValue V2 = N->getOperand(Vec0Idx + 2);
|
||||
SDValue V3 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
|
||||
: N->getOperand(3+3);
|
||||
: N->getOperand(Vec0Idx + 3);
|
||||
SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
|
||||
|
||||
// Store the even D registers.
|
||||
// Store the even D registers. This is always an updating store, so that it
|
||||
// provides the address to the second store for the odd subregs.
|
||||
const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
|
||||
SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
|
||||
MemAddr.getValueType(),
|
||||
@@ -1648,28 +1670,40 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
Chain = SDValue(VStA, 1);
|
||||
|
||||
// Store the odd D registers.
|
||||
const SDValue OpsB[] = { SDValue(VStA, 0), Align, RegSeq, Pred, Reg0, Chain };
|
||||
SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl,
|
||||
MVT::Other, OpsB, 6);
|
||||
Chain = SDValue(VStB, 0);
|
||||
ReplaceUses(SDValue(N, 0), Chain);
|
||||
return NULL;
|
||||
Ops.push_back(SDValue(VStA, 0));
|
||||
Ops.push_back(Align);
|
||||
if (isUpdating) {
|
||||
SDValue Inc = N->getOperand(AddrOpIdx + 1);
|
||||
assert(isa<ConstantSDNode>(Inc.getNode()) &&
|
||||
"only constant post-increment update allowed for VST3/4");
|
||||
(void)Inc;
|
||||
Ops.push_back(Reg0);
|
||||
}
|
||||
Ops.push_back(RegSeq);
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0);
|
||||
Ops.push_back(Chain);
|
||||
return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
|
||||
Ops.data(), Ops.size());
|
||||
}
|
||||
|
||||
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
|
||||
unsigned NumVecs, unsigned *DOpcodes,
|
||||
bool isUpdating, unsigned NumVecs,
|
||||
unsigned *DOpcodes,
|
||||
unsigned *QOpcodes) {
|
||||
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
SDValue MemAddr, Align;
|
||||
if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
|
||||
unsigned AddrOpIdx = isUpdating ? 1 : 2;
|
||||
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
|
||||
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
|
||||
return NULL;
|
||||
|
||||
SDValue Chain = N->getOperand(0);
|
||||
unsigned Lane =
|
||||
cast<ConstantSDNode>(N->getOperand(NumVecs+3))->getZExtValue();
|
||||
EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
|
||||
cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
|
||||
EVT VT = N->getOperand(Vec0Idx).getValueType();
|
||||
bool is64BitVector = VT.is64BitVector();
|
||||
|
||||
unsigned Alignment = 0;
|
||||
@@ -1701,29 +1735,42 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
|
||||
case MVT::v4i32: OpcodeIndex = 1; break;
|
||||
}
|
||||
|
||||
std::vector<EVT> ResTys;
|
||||
if (IsLoad) {
|
||||
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
|
||||
if (!is64BitVector)
|
||||
ResTyElts *= 2;
|
||||
ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
|
||||
MVT::i64, ResTyElts));
|
||||
}
|
||||
if (isUpdating)
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::Other);
|
||||
|
||||
SDValue Pred = getAL(CurDAG);
|
||||
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
|
||||
|
||||
SmallVector<SDValue, 7> Ops;
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
Ops.push_back(MemAddr);
|
||||
Ops.push_back(Align);
|
||||
|
||||
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
|
||||
QOpcodes[OpcodeIndex]);
|
||||
if (isUpdating) {
|
||||
SDValue Inc = N->getOperand(AddrOpIdx + 1);
|
||||
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
|
||||
}
|
||||
|
||||
SDValue SuperReg;
|
||||
SDValue V0 = N->getOperand(0+3);
|
||||
SDValue V1 = N->getOperand(1+3);
|
||||
SDValue V0 = N->getOperand(Vec0Idx + 0);
|
||||
SDValue V1 = N->getOperand(Vec0Idx + 1);
|
||||
if (NumVecs == 2) {
|
||||
if (is64BitVector)
|
||||
SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
|
||||
else
|
||||
SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
|
||||
} else {
|
||||
SDValue V2 = N->getOperand(2+3);
|
||||
SDValue V2 = N->getOperand(Vec0Idx + 2);
|
||||
SDValue V3 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
|
||||
: N->getOperand(3+3);
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
|
||||
: N->getOperand(Vec0Idx + 3);
|
||||
if (is64BitVector)
|
||||
SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
||||
else
|
||||
@@ -1735,33 +1782,29 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
|
||||
Ops.push_back(Reg0);
|
||||
Ops.push_back(Chain);
|
||||
|
||||
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
|
||||
QOpcodes[OpcodeIndex]);
|
||||
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
|
||||
Ops.data(), Ops.size());
|
||||
if (!IsLoad)
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 7);
|
||||
|
||||
EVT ResTy;
|
||||
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
|
||||
if (!is64BitVector)
|
||||
ResTyElts *= 2;
|
||||
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
|
||||
|
||||
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other,
|
||||
Ops.data(), 7);
|
||||
SuperReg = SDValue(VLdLn, 0);
|
||||
Chain = SDValue(VLdLn, 1);
|
||||
return VLdLn;
|
||||
|
||||
// Extract the subregisters.
|
||||
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
||||
assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
|
||||
unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
|
||||
SuperReg = SDValue(VLdLn, 0);
|
||||
assert(ARM::dsub_7 == ARM::dsub_0+7 &&
|
||||
ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
|
||||
unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||
ReplaceUses(SDValue(N, Vec),
|
||||
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
|
||||
ReplaceUses(SDValue(N, NumVecs), Chain);
|
||||
CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
|
||||
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
|
||||
if (isUpdating)
|
||||
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, unsigned NumVecs,
|
||||
unsigned *Opcodes) {
|
||||
SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
|
||||
unsigned NumVecs, unsigned *Opcodes) {
|
||||
assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
@@ -1800,13 +1843,26 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, unsigned NumVecs,
|
||||
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
|
||||
SDValue SuperReg;
|
||||
unsigned Opc = Opcodes[OpcodeIndex];
|
||||
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
|
||||
SmallVector<SDValue, 6> Ops;
|
||||
Ops.push_back(MemAddr);
|
||||
Ops.push_back(Align);
|
||||
if (isUpdating) {
|
||||
SDValue Inc = N->getOperand(2);
|
||||
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
|
||||
}
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0);
|
||||
Ops.push_back(Chain);
|
||||
|
||||
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
|
||||
EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
|
||||
SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
|
||||
std::vector<EVT> ResTys;
|
||||
ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts));
|
||||
if (isUpdating)
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::Other);
|
||||
SDNode *VLdDup =
|
||||
CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
|
||||
SuperReg = SDValue(VLdDup, 0);
|
||||
Chain = SDValue(VLdDup, 1);
|
||||
|
||||
// Extract the subregisters.
|
||||
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
||||
@@ -1814,7 +1870,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, unsigned NumVecs,
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||
ReplaceUses(SDValue(N, Vec),
|
||||
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
|
||||
ReplaceUses(SDValue(N, NumVecs), Chain);
|
||||
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
|
||||
if (isUpdating)
|
||||
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -2470,19 +2528,165 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
case ARMISD::VLD2DUP: {
|
||||
unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
|
||||
ARM::VLD2DUPd32Pseudo };
|
||||
return SelectVLDDup(N, 2, Opcodes);
|
||||
return SelectVLDDup(N, false, 2, Opcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VLD3DUP: {
|
||||
unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
|
||||
ARM::VLD3DUPd32Pseudo };
|
||||
return SelectVLDDup(N, 3, Opcodes);
|
||||
return SelectVLDDup(N, false, 3, Opcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VLD4DUP: {
|
||||
unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
|
||||
ARM::VLD4DUPd32Pseudo };
|
||||
return SelectVLDDup(N, 4, Opcodes);
|
||||
return SelectVLDDup(N, false, 4, Opcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VLD2DUP_UPD: {
|
||||
unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
|
||||
ARM::VLD2DUPd32Pseudo_UPD };
|
||||
return SelectVLDDup(N, true, 2, Opcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VLD3DUP_UPD: {
|
||||
unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
|
||||
ARM::VLD3DUPd32Pseudo_UPD };
|
||||
return SelectVLDDup(N, true, 3, Opcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VLD4DUP_UPD: {
|
||||
unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
|
||||
ARM::VLD4DUPd32Pseudo_UPD };
|
||||
return SelectVLDDup(N, true, 4, Opcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VLD1_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
|
||||
ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
|
||||
unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
|
||||
ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
|
||||
return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
case ARMISD::VLD2_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
|
||||
ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
|
||||
unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
|
||||
ARM::VLD2q32Pseudo_UPD };
|
||||
return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
case ARMISD::VLD3_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
|
||||
ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
|
||||
unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
|
||||
ARM::VLD3q16Pseudo_UPD,
|
||||
ARM::VLD3q32Pseudo_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
|
||||
ARM::VLD3q16oddPseudo_UPD,
|
||||
ARM::VLD3q32oddPseudo_UPD };
|
||||
return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
case ARMISD::VLD4_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
|
||||
ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
|
||||
unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
|
||||
ARM::VLD4q16Pseudo_UPD,
|
||||
ARM::VLD4q32Pseudo_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
|
||||
ARM::VLD4q16oddPseudo_UPD,
|
||||
ARM::VLD4q32oddPseudo_UPD };
|
||||
return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
case ARMISD::VLD2LN_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
|
||||
ARM::VLD2LNd32Pseudo_UPD };
|
||||
unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
|
||||
ARM::VLD2LNq32Pseudo_UPD };
|
||||
return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VLD3LN_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
|
||||
ARM::VLD3LNd32Pseudo_UPD };
|
||||
unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
|
||||
ARM::VLD3LNq32Pseudo_UPD };
|
||||
return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VLD4LN_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
|
||||
ARM::VLD4LNd32Pseudo_UPD };
|
||||
unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
|
||||
ARM::VLD4LNq32Pseudo_UPD };
|
||||
return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VST1_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
|
||||
ARM::VST1d32_UPD, ARM::VST1d64_UPD };
|
||||
unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
|
||||
ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
|
||||
return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
case ARMISD::VST2_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
|
||||
ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
|
||||
unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
|
||||
ARM::VST2q32Pseudo_UPD };
|
||||
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
case ARMISD::VST3_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
|
||||
ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
|
||||
unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
|
||||
ARM::VST3q16Pseudo_UPD,
|
||||
ARM::VST3q32Pseudo_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
|
||||
ARM::VST3q16oddPseudo_UPD,
|
||||
ARM::VST3q32oddPseudo_UPD };
|
||||
return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
case ARMISD::VST4_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
|
||||
ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
|
||||
unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
|
||||
ARM::VST4q16Pseudo_UPD,
|
||||
ARM::VST4q32Pseudo_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
|
||||
ARM::VST4q16oddPseudo_UPD,
|
||||
ARM::VST4q32oddPseudo_UPD };
|
||||
return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
case ARMISD::VST2LN_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
|
||||
ARM::VST2LNd32Pseudo_UPD };
|
||||
unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
|
||||
ARM::VST2LNq32Pseudo_UPD };
|
||||
return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VST3LN_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
|
||||
ARM::VST3LNd32Pseudo_UPD };
|
||||
unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
|
||||
ARM::VST3LNq32Pseudo_UPD };
|
||||
return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VST4LN_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
|
||||
ARM::VST4LNd32Pseudo_UPD };
|
||||
unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
|
||||
ARM::VST4LNq32Pseudo_UPD };
|
||||
return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case ISD::INTRINSIC_VOID:
|
||||
@@ -2497,7 +2701,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
ARM::VLD1d32, ARM::VLD1d64 };
|
||||
unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
|
||||
ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
|
||||
return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
|
||||
return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld2: {
|
||||
@@ -2505,7 +2709,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
|
||||
ARM::VLD2q32Pseudo };
|
||||
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
|
||||
return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld3: {
|
||||
@@ -2517,7 +2721,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
|
||||
ARM::VLD3q16oddPseudo,
|
||||
ARM::VLD3q32oddPseudo };
|
||||
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld4: {
|
||||
@@ -2529,28 +2733,28 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
|
||||
ARM::VLD4q16oddPseudo,
|
||||
ARM::VLD4q32oddPseudo };
|
||||
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld2lane: {
|
||||
unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
|
||||
ARM::VLD2LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes);
|
||||
return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld3lane: {
|
||||
unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
|
||||
ARM::VLD3LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes);
|
||||
return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld4lane: {
|
||||
unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
|
||||
ARM::VLD4LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes);
|
||||
return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst1: {
|
||||
@@ -2558,7 +2762,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
ARM::VST1d32, ARM::VST1d64 };
|
||||
unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
|
||||
ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
|
||||
return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
|
||||
return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst2: {
|
||||
@@ -2566,7 +2770,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
|
||||
ARM::VST2q32Pseudo };
|
||||
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
|
||||
return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst3: {
|
||||
@@ -2578,7 +2782,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
|
||||
ARM::VST3q16oddPseudo,
|
||||
ARM::VST3q32oddPseudo };
|
||||
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst4: {
|
||||
@@ -2590,28 +2794,28 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
|
||||
ARM::VST4q16oddPseudo,
|
||||
ARM::VST4q32oddPseudo };
|
||||
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst2lane: {
|
||||
unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
|
||||
ARM::VST2LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes);
|
||||
return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst3lane: {
|
||||
unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
|
||||
ARM::VST3LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes);
|
||||
return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst4lane: {
|
||||
unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
|
||||
ARM::VST4LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes);
|
||||
return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
Reference in New Issue
Block a user