Implement AArch64 vector load/store multiple N-element structure class SIMD(lselem).

Including following 14 instructions:
4 ld1 insts: load multiple 1-element structure to sequential 1/2/3/4 registers.
ld2/ld3/ld4: load multiple N-element structure to sequential N registers (N=2,3,4).
4 st1 insts: store multiple 1-element structure from sequential 1/2/3/4 registers.
st2/st3/st4: store multiple N-element structure from sequential N registers (N = 2,3,4).



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192352 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hao Liu 2013-10-10 15:01:24 +00:00
parent 8ccf2b3c9e
commit d622bef31d
15 changed files with 2808 additions and 3 deletions

View File

@ -208,7 +208,7 @@ namespace llvm {
bool is64BitVector() const { bool is64BitVector() const {
return (SimpleTy == MVT::v8i8 || SimpleTy == MVT::v4i16 || return (SimpleTy == MVT::v8i8 || SimpleTy == MVT::v4i16 ||
SimpleTy == MVT::v2i32 || SimpleTy == MVT::v1i64 || SimpleTy == MVT::v2i32 || SimpleTy == MVT::v1i64 ||
SimpleTy == MVT::v2f32); SimpleTy == MVT::v1f64 || SimpleTy == MVT::v2f32);
} }
/// is128BitVector - Return true if this is a 128-bit vector type. /// is128BitVector - Return true if this is a 128-bit vector type.

View File

@ -109,6 +109,23 @@ public:
SDNode* Select(SDNode*); SDNode* Select(SDNode*);
private: private:
/// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, const uint16_t *Opcode);
/// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4.
SDNode *SelectVST(SDNode *N, unsigned NumVecs, const uint16_t *Opcodes);
// Form pairs of consecutive 64-bit/128-bit registers.
SDNode *createDPairNode(SDValue V0, SDValue V1);
SDNode *createQPairNode(SDValue V0, SDValue V1);
// Form sequences of 3 consecutive 64-bit/128-bit registers.
SDNode *createDTripleNode(SDValue V0, SDValue V1, SDValue V2);
SDNode *createQTripleNode(SDValue V0, SDValue V1, SDValue V2);
// Form sequences of 4 consecutive 64-bit/128-bit registers.
SDNode *createDQuadNode(SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *createQQuadNode(SDValue V0, SDValue V1, SDValue V2, SDValue V3);
}; };
} }
@ -390,6 +407,221 @@ SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
&Ops[0], Ops.size()); &Ops[0], Ops.size());
} }
SDNode *AArch64DAGToDAGISel::createDPairNode(SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(AArch64::DPairRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v2i64,
Ops);
}
SDNode *AArch64DAGToDAGISel::createQPairNode(SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(AArch64::QPairRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v4i64,
Ops);
}
SDNode *AArch64DAGToDAGISel::createDTripleNode(SDValue V0, SDValue V1,
SDValue V2) {
SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(AArch64::DTripleRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::dsub_2, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped,
Ops);
}
SDNode *AArch64DAGToDAGISel::createQTripleNode(SDValue V0, SDValue V1,
SDValue V2) {
SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(AArch64::QTripleRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::qsub_2, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped,
Ops);
}
SDNode *AArch64DAGToDAGISel::createDQuadNode(SDValue V0, SDValue V1, SDValue V2,
SDValue V3) {
SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(AArch64::DQuadRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::dsub_2, MVT::i32);
SDValue SubReg3 = CurDAG->getTargetConstant(AArch64::dsub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2, V3,
SubReg3 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v4i64,
Ops);
}
SDNode *AArch64DAGToDAGISel::createQQuadNode(SDValue V0, SDValue V1, SDValue V2,
SDValue V3) {
SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(AArch64::QQuadRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::qsub_2, MVT::i32);
SDValue SubReg3 = CurDAG->getTargetConstant(AArch64::qsub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2, V3,
SubReg3 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v8i64,
Ops);
}
SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
const uint16_t *Opcodes) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
EVT VT = N->getValueType(0);
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vector load type");
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
case MVT::v1f64:
case MVT::v1i64: OpcodeIndex = 3; break;
case MVT::v16i8: OpcodeIndex = 4; break;
case MVT::v8f16:
case MVT::v8i16: OpcodeIndex = 5; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 6; break;
case MVT::v2f64:
case MVT::v2i64: OpcodeIndex = 7; break;
}
unsigned Opc = Opcodes[OpcodeIndex];
SmallVector<SDValue, 2> Ops;
Ops.push_back(N->getOperand(2)); // Push back the Memory Address
Ops.push_back(N->getOperand(0)); // Push back the Chain
std::vector<EVT> ResTys;
bool is64BitVector = VT.is64BitVector();
if (NumVecs == 1)
ResTys.push_back(VT);
else if (NumVecs == 3)
ResTys.push_back(MVT::Untyped);
else {
EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
is64BitVector ? NumVecs : NumVecs * 2);
ResTys.push_back(ResTy);
}
ResTys.push_back(MVT::Other); // Type of the Chain
SDLoc dl(N);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
if (NumVecs == 1)
return VLd;
// If NumVecs > 1, the return result is a super register containing 2-4
// consecutive vector registers.
SDValue SuperReg = SDValue(VLd, 0);
unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
// Update users of the Chain
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
return NULL;
}
SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
const uint16_t *Opcodes) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
SDLoc dl(N);
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
unsigned Vec0Idx = 3;
EVT VT = N->getOperand(Vec0Idx).getValueType();
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vector store type");
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
case MVT::v1f64:
case MVT::v1i64: OpcodeIndex = 3; break;
case MVT::v16i8: OpcodeIndex = 4; break;
case MVT::v8f16:
case MVT::v8i16: OpcodeIndex = 5; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 6; break;
case MVT::v2f64:
case MVT::v2i64: OpcodeIndex = 7; break;
}
unsigned Opc = Opcodes[OpcodeIndex];
std::vector<EVT> ResTys;
ResTys.push_back(MVT::Other); // Type for the Chain
SmallVector<SDValue, 6> Ops;
Ops.push_back(N->getOperand(2)); // Push back the Memory Address
bool is64BitVector = VT.is64BitVector();
SDValue V0 = N->getOperand(Vec0Idx + 0);
SDValue SrcReg;
if (NumVecs == 1)
SrcReg = V0;
else {
SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2)
SrcReg = is64BitVector ? SDValue(createDPairNode(V0, V1), 0)
: SDValue(createQPairNode(V0, V1), 0);
else {
SDValue V2 = N->getOperand(Vec0Idx + 2);
if (NumVecs == 3)
SrcReg = is64BitVector ? SDValue(createDTripleNode(V0, V1, V2), 0)
: SDValue(createQTripleNode(V0, V1, V2), 0);
else {
SDValue V3 = N->getOperand(Vec0Idx + 3);
SrcReg = is64BitVector ? SDValue(createDQuadNode(V0, V1, V2, V3), 0)
: SDValue(createQQuadNode(V0, V1, V2, V3), 0);
}
}
}
Ops.push_back(SrcReg);
// Push back the Chain
Ops.push_back(N->getOperand(0));
// Transfer memoperands.
SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
return VSt;
}
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected // Dump information about the Node being selected
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
@ -536,6 +768,72 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
Node = ResNode; Node = ResNode;
break; break;
} }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
switch (IntNo) {
default:
break;
case Intrinsic::arm_neon_vld1: {
static const uint16_t Opcodes[] = { AArch64::LD1_8B, AArch64::LD1_4H,
AArch64::LD1_2S, AArch64::LD1_1D,
AArch64::LD1_16B, AArch64::LD1_8H,
AArch64::LD1_4S, AArch64::LD1_2D };
return SelectVLD(Node, 1, Opcodes);
}
case Intrinsic::arm_neon_vld2: {
static const uint16_t Opcodes[] = { AArch64::LD2_8B, AArch64::LD2_4H,
AArch64::LD2_2S, AArch64::LD1_2V_1D,
AArch64::LD2_16B, AArch64::LD2_8H,
AArch64::LD2_4S, AArch64::LD2_2D };
return SelectVLD(Node, 2, Opcodes);
}
case Intrinsic::arm_neon_vld3: {
static const uint16_t Opcodes[] = { AArch64::LD3_8B, AArch64::LD3_4H,
AArch64::LD3_2S, AArch64::LD1_3V_1D,
AArch64::LD3_16B, AArch64::LD3_8H,
AArch64::LD3_4S, AArch64::LD3_2D };
return SelectVLD(Node, 3, Opcodes);
}
case Intrinsic::arm_neon_vld4: {
static const uint16_t Opcodes[] = { AArch64::LD4_8B, AArch64::LD4_4H,
AArch64::LD4_2S, AArch64::LD1_4V_1D,
AArch64::LD4_16B, AArch64::LD4_8H,
AArch64::LD4_4S, AArch64::LD4_2D };
return SelectVLD(Node, 4, Opcodes);
}
case Intrinsic::arm_neon_vst1: {
static const uint16_t Opcodes[] = { AArch64::ST1_8B, AArch64::ST1_4H,
AArch64::ST1_2S, AArch64::ST1_1D,
AArch64::ST1_16B, AArch64::ST1_8H,
AArch64::ST1_4S, AArch64::ST1_2D };
return SelectVST(Node, 1, Opcodes);
}
case Intrinsic::arm_neon_vst2: {
static const uint16_t Opcodes[] = { AArch64::ST2_8B, AArch64::ST2_4H,
AArch64::ST2_2S, AArch64::ST1_2V_1D,
AArch64::ST2_16B, AArch64::ST2_8H,
AArch64::ST2_4S, AArch64::ST2_2D };
return SelectVST(Node, 2, Opcodes);
}
case Intrinsic::arm_neon_vst3: {
static const uint16_t Opcodes[] = { AArch64::ST3_8B, AArch64::ST3_4H,
AArch64::ST3_2S, AArch64::ST1_3V_1D,
AArch64::ST3_16B, AArch64::ST3_8H,
AArch64::ST3_4S, AArch64::ST3_2D };
return SelectVST(Node, 3, Opcodes);
}
case Intrinsic::arm_neon_vst4: {
static const uint16_t Opcodes[] = { AArch64::ST4_8B, AArch64::ST4_4H,
AArch64::ST4_2S, AArch64::ST1_4V_1D,
AArch64::ST4_16B, AArch64::ST4_8H,
AArch64::ST4_4S, AArch64::ST4_2D };
return SelectVST(Node, 4, Opcodes);
}
}
break;
}
default: default:
break; // Let generic code handle it break; // Let generic code handle it
} }

View File

@ -3681,3 +3681,57 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
// constraint into a member of a register class. // constraint into a member of a register class.
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
} }
/// Represent NEON load and store intrinsics as MemIntrinsicNodes.
/// The associated MachineMemOperands record the alignment specified
/// in the intrinsic calls.
bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
switch (Intrinsic) {
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
case Intrinsic::arm_neon_vld4: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
Info.vol = false; // volatile loads with NEON intrinsics not supported
Info.readMem = true;
Info.writeMem = false;
return true;
}
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
case Intrinsic::arm_neon_vst4: {
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
Info.vol = false; // volatile stores with NEON intrinsics not supported
Info.readMem = false;
Info.writeMem = true;
return true;
}
default:
break;
}
return false;
}

View File

@ -281,6 +281,10 @@ public:
std::pair<unsigned, const TargetRegisterClass*> std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
unsigned Intrinsic) const LLVM_OVERRIDE;
private: private:
const InstrItineraryData *Itins; const InstrItineraryData *Itins;

View File

@ -1194,5 +1194,23 @@ class NeonI_Scalar2SameMisc<bit u, bits<2> size, bits<5> opcode, dag outs, dag i
// Inherit Rd in 4-0 // Inherit Rd in 4-0
} }
// Format AdvSIMD vector load/store multiple N-element structure
class NeonI_LdStMult<bit q, bit l, bits<4> opcode, bits<2> size,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
: A64InstRtn<outs, ins, asmstr, patterns, itin>
{
let Inst{31} = 0b0;
let Inst{30} = q;
let Inst{29-23} = 0b0011000;
let Inst{22} = l;
let Inst{21-16} = 0b000000;
let Inst{15-12} = opcode;
let Inst{11-10} = size;
// Inherit Rn in 9-5
// Inherit Rt in 4-0
}
} }

View File

@ -2982,6 +2982,132 @@ defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2",
// End of implementation for instruction class (3V Diff) // End of implementation for instruction class (3V Diff)
// The followings are vector load/store multiple N-element structure
// (class SIMD lselem).
// ld1: load multiple 1-element structure to 1/2/3/4 registers.
// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
// The structure consists of a sequence of sets of N values.
// The first element of the structure is placed in the first lane
// of the first first vector, the second element in the first lane
// of the second vector, and so on.
// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
// the three 64-bit vectors list {BA, DC, FE}.
// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
// 64-bit vectors list {DA, EB, FC}.
// Store instructions store multiple structure to N registers like load.
class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
RegisterOperand VecList, string asmop>
: NeonI_LdStMult<q, 1, opcode, size,
(outs VecList:$Rt), (ins GPR64xsp:$Rn),
asmop # "\t$Rt, [$Rn]",
[],
NoItinerary> {
let mayLoad = 1;
let neverHasSideEffects = 1;
}
multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
def _8B : NeonI_LDVList<0, opcode, 0b00,
!cast<RegisterOperand>(List # "8B_operand"), asmop>;
def _4H : NeonI_LDVList<0, opcode, 0b01,
!cast<RegisterOperand>(List # "4H_operand"), asmop>;
def _2S : NeonI_LDVList<0, opcode, 0b10,
!cast<RegisterOperand>(List # "2S_operand"), asmop>;
def _16B : NeonI_LDVList<1, opcode, 0b00,
!cast<RegisterOperand>(List # "16B_operand"), asmop>;
def _8H : NeonI_LDVList<1, opcode, 0b01,
!cast<RegisterOperand>(List # "8H_operand"), asmop>;
def _4S : NeonI_LDVList<1, opcode, 0b10,
!cast<RegisterOperand>(List # "4S_operand"), asmop>;
def _2D : NeonI_LDVList<1, opcode, 0b11,
!cast<RegisterOperand>(List # "2D_operand"), asmop>;
}
// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
// Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
defm LD1_2V : LDVList_BHSD<0b1010, "VPair", "ld1">;
def LD1_2V_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
defm LD1_3V : LDVList_BHSD<0b0110, "VTriple", "ld1">;
def LD1_3V_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
defm LD1_4V : LDVList_BHSD<0b0010, "VQuad", "ld1">;
def LD1_4V_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
RegisterOperand VecList, string asmop>
: NeonI_LdStMult<q, 0, opcode, size,
(outs), (ins GPR64xsp:$Rn, VecList:$Rt),
asmop # "\t$Rt, [$Rn]",
[],
NoItinerary> {
let mayStore = 1;
let neverHasSideEffects = 1;
}
multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
def _8B : NeonI_STVList<0, opcode, 0b00,
!cast<RegisterOperand>(List # "8B_operand"), asmop>;
def _4H : NeonI_STVList<0, opcode, 0b01,
!cast<RegisterOperand>(List # "4H_operand"), asmop>;
def _2S : NeonI_STVList<0, opcode, 0b10,
!cast<RegisterOperand>(List # "2S_operand"), asmop>;
def _16B : NeonI_STVList<1, opcode, 0b00,
!cast<RegisterOperand>(List # "16B_operand"), asmop>;
def _8H : NeonI_STVList<1, opcode, 0b01,
!cast<RegisterOperand>(List # "8H_operand"), asmop>;
def _4S : NeonI_STVList<1, opcode, 0b10,
!cast<RegisterOperand>(List # "4S_operand"), asmop>;
def _2D : NeonI_STVList<1, opcode, 0b11,
!cast<RegisterOperand>(List # "2D_operand"), asmop>;
}
// Store multiple N-element structures from N registers (N = 1,2,3,4)
defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
// Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
defm ST1_2V : STVList_BHSD<0b1010, "VPair", "st1">;
def ST1_2V_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
defm ST1_3V : STVList_BHSD<0b0110, "VTriple", "st1">;
def ST1_3V_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
defm ST1_4V : STVList_BHSD<0b0010, "VQuad", "st1">;
def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
// End of vector load/store multiple N-element structure(class SIMD lselem)
// Scalar Arithmetic // Scalar Arithmetic
class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop> class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>

View File

@ -17,6 +17,20 @@ def sub_64 : SubRegIndex<64>;
def sub_32 : SubRegIndex<32>; def sub_32 : SubRegIndex<32>;
def sub_16 : SubRegIndex<16>; def sub_16 : SubRegIndex<16>;
def sub_8 : SubRegIndex<8>; def sub_8 : SubRegIndex<8>;
// Note: Code depends on these having consecutive numbers.
def qqsub : SubRegIndex<256, 256>;
def qsub_0 : SubRegIndex<128>;
def qsub_1 : SubRegIndex<128, 128>;
def qsub_2 : ComposedSubRegIndex<qqsub, qsub_0>;
def qsub_3 : ComposedSubRegIndex<qqsub, qsub_1>;
def dsub_0 : SubRegIndex<64>;
def dsub_1 : SubRegIndex<64, 64>;
def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>;
def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>;
def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>;
} }
// Registers are identified with 5-bit ID numbers. // Registers are identified with 5-bit ID numbers.
@ -188,3 +202,90 @@ def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
let CopyCost = -1; let CopyCost = -1;
let isAllocatable = 0; let isAllocatable = 0;
} }
//===----------------------------------------------------------------------===//
// Consecutive vector registers
//===----------------------------------------------------------------------===//
// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D30_D31
def Tuples2D : RegisterTuples<[dsub_0, dsub_1],
[(rotl FPR64, 0), (rotl FPR64, 1)]>;
// 3 Consecutive 64-bit registers: D0_D1_D2, ..., D31_D0_D1
def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
[(rotl FPR64, 0), (rotl FPR64, 1),
(rotl FPR64, 2)]>;
// 4 Consecutive 64-bit registers: D0_D1_D2_D3, ..., D31_D0_D1_D2
def Tuples4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
[(rotl FPR64, 0), (rotl FPR64, 1),
(rotl FPR64, 2), (rotl FPR64, 3)]>;
// 2 Consecutive 128-bit registers: Q0_Q1, Q1_Q2, ..., Q30_Q31
def Tuples2Q : RegisterTuples<[qsub_0, qsub_1],
[(rotl FPR128, 0), (rotl FPR128, 1)]>;
// 3 Consecutive 128-bit registers: Q0_Q1_Q2, ..., Q31_Q0_Q1
def Tuples3Q : RegisterTuples<[qsub_0, qsub_1, qsub_2],
[(rotl FPR128, 0), (rotl FPR128, 1),
(rotl FPR128, 2)]>;
// 4 Consecutive 128-bit registers: Q0_Q1_Q2_Q3, ..., Q31_Q0_Q1_Q2
def Tuples4Q : RegisterTuples<[qsub_0, qsub_1, qsub_2, qsub_3],
[(rotl FPR128, 0), (rotl FPR128, 1),
(rotl FPR128, 2), (rotl FPR128, 3)]>;
// The followings are super register classes to model 2/3/4 consecutive
// 64-bit/128-bit registers.
def DPair : RegisterClass<"AArch64", [v2i64], 64, (add Tuples2D)>;
def DTriple : RegisterClass<"AArch64", [untyped], 64, (add Tuples3D)> {
let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
}
def DQuad : RegisterClass<"AArch64", [v4i64], 64, (add Tuples4D)>;
def QPair : RegisterClass<"AArch64", [v4i64], 128, (add Tuples2Q)>;
def QTriple : RegisterClass<"AArch64", [untyped], 128, (add Tuples3Q)> {
let Size = 384; // 3 x 128 bits, we have no predefined type of that size.
}
def QQuad : RegisterClass<"AArch64", [v8i64], 128, (add Tuples4Q)>;
// The followings are vector list operands
multiclass VectorList_operands<string PREFIX, string LAYOUT, int Count,
RegisterClass RegList> {
def _asmoperand : AsmOperandClass {
let Name = PREFIX # LAYOUT # Count;
let RenderMethod = "addVectorListOperands";
let PredicateMethod =
"isVectorList<A64Layout::_" # LAYOUT # ", " # Count # ">";
let ParserMethod = "ParseVectorList";
}
def _operand : RegisterOperand<RegList,
"printVectorList<A64Layout::_" # LAYOUT # ", " # Count # ">"> {
let ParserMatchClass =
!cast<AsmOperandClass>(PREFIX # LAYOUT # "_asmoperand");
}
}
multiclass VectorList_BHSD<string PREFIX, int Count, RegisterClass DRegList,
RegisterClass QRegList> {
defm 8B : VectorList_operands<PREFIX, "8B", Count, DRegList>;
defm 4H : VectorList_operands<PREFIX, "4H", Count, DRegList>;
defm 2S : VectorList_operands<PREFIX, "2S", Count, DRegList>;
defm 1D : VectorList_operands<PREFIX, "1D", Count, DRegList>;
defm 16B : VectorList_operands<PREFIX, "16B", Count, QRegList>;
defm 8H : VectorList_operands<PREFIX, "8H", Count, QRegList>;
defm 4S : VectorList_operands<PREFIX, "4S", Count, QRegList>;
defm 2D : VectorList_operands<PREFIX, "2D", Count, QRegList>;
}
// Vector list operand with 1/2/3/4 registers: VOne8B_operand,..., VQuad2D_operand
defm VOne : VectorList_BHSD<"VOne", 1, FPR64, FPR128>;
defm VPair : VectorList_BHSD<"VPair", 2, DPair, QPair>;
defm VTriple : VectorList_BHSD<"VTriple", 3, DTriple, QTriple>;
defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>;

View File

@ -127,6 +127,11 @@ public:
OperandMatchResultTy OperandMatchResultTy
ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, StringRef &Layout,
SMLoc &LayoutLoc);
OperandMatchResultTy ParseVectorList(SmallVectorImpl<MCParsedAsmOperand *> &);
bool validateInstruction(MCInst &Inst, bool validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands); const SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@ -154,6 +159,7 @@ private:
k_Immediate, // Including expressions referencing symbols k_Immediate, // Including expressions referencing symbols
k_Register, k_Register,
k_ShiftExtend, k_ShiftExtend,
k_VectorList, // A sequential list of 1 to 4 registers.
k_SysReg, // The register operand of MRS and MSR instructions k_SysReg, // The register operand of MRS and MSR instructions
k_Token, // The mnemonic; other raw tokens the auto-generated k_Token, // The mnemonic; other raw tokens the auto-generated
k_WrappedRegister // Load/store exclusive permit a wrapped register. k_WrappedRegister // Load/store exclusive permit a wrapped register.
@ -189,6 +195,13 @@ private:
bool ImplicitAmount; bool ImplicitAmount;
}; };
// A vector register list is a sequential list of 1 to 4 registers.
struct VectorListOp {
unsigned RegNum;
unsigned Count;
A64Layout::VectorLayout Layout;
};
struct SysRegOp { struct SysRegOp {
const char *Data; const char *Data;
unsigned Length; unsigned Length;
@ -206,6 +219,7 @@ private:
struct ImmOp Imm; struct ImmOp Imm;
struct RegOp Reg; struct RegOp Reg;
struct ShiftExtendOp ShiftExtend; struct ShiftExtendOp ShiftExtend;
struct VectorListOp VectorList;
struct SysRegOp SysReg; struct SysRegOp SysReg;
struct TokOp Tok; struct TokOp Tok;
}; };
@ -717,6 +731,12 @@ public:
return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16; return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16;
} }
template <A64Layout::VectorLayout Layout, unsigned Count>
bool isVectorList() const {
return Kind == k_VectorList && VectorList.Layout == Layout &&
VectorList.Count == Count;
}
template <int MemSize> bool isSImm7Scaled() const { template <int MemSize> bool isSImm7Scaled() const {
if (!isImm()) if (!isImm())
return false; return false;
@ -837,6 +857,18 @@ public:
return Op; return Op;
} }
static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
A64Layout::VectorLayout Layout,
SMLoc S, SMLoc E) {
AArch64Operand *Op = new AArch64Operand(k_VectorList, S, E);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.Layout = Layout;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
static AArch64Operand *CreateToken(StringRef Str, SMLoc S) { static AArch64Operand *CreateToken(StringRef Str, SMLoc S) {
AArch64Operand *Op = new AArch64Operand(k_Token, S, S); AArch64Operand *Op = new AArch64Operand(k_Token, S, S);
Op->Tok.Data = Str.data(); Op->Tok.Data = Str.data();
@ -1184,6 +1216,11 @@ public:
} }
Inst.addOperand(MCOperand::CreateImm(Imm)); Inst.addOperand(MCOperand::CreateImm(Imm));
} }
void addVectorListOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
}
}; };
} // end anonymous namespace. } // end anonymous namespace.
@ -1223,7 +1260,6 @@ AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
else else
return MatchOperand_Success; return MatchOperand_Success;
} }
// ... or it might be a symbolish thing // ... or it might be a symbolish thing
} }
// Fall through // Fall through
@ -1267,7 +1303,7 @@ AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return ParseOperand(Operands, Mnemonic); return ParseOperand(Operands, Mnemonic);
} }
// The following will likely be useful later, but not in very early cases // The following will likely be useful later, but not in very early cases
case AsmToken::LCurly: // Weird SIMD lists case AsmToken::LCurly: // SIMD vector list is not parsed here
llvm_unreachable("Don't know how to deal with '{' in operand"); llvm_unreachable("Don't know how to deal with '{' in operand");
return MatchOperand_ParseFail; return MatchOperand_ParseFail;
} }
@ -1890,6 +1926,132 @@ AArch64AsmParser::ParseShiftExtend(
return MatchOperand_Success; return MatchOperand_Success;
} }
/// Try to parse a vector register token, If it is a vector register,
/// the token is eaten and return true. Otherwise return false.
bool AArch64AsmParser::TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc,
StringRef &Layout, SMLoc &LayoutLoc) {
bool IsVector = true;
if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc))
IsVector = false;
if (!AArch64MCRegisterClasses[AArch64::FPR64RegClassID].contains(RegNum) &&
!AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(RegNum))
IsVector = false;
if (Layout.size() == 0)
IsVector = false;
if (!IsVector)
Error(Parser.getTok().getLoc(), "expected vector type register");
Parser.Lex(); // Eat this token.
return IsVector;
}
// A vector list contains 1-4 consecutive registers.
// Now there are two kinds of vector list when number of vector > 1:
// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout}
// (2) {Vn.layout - Vm.layout}
AArch64AsmParser::OperandMatchResultTy AArch64AsmParser::ParseVectorList(
SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
if (Parser.getTok().isNot(AsmToken::LCurly)) {
Error(Parser.getTok().getLoc(), "'{' expected");
return MatchOperand_ParseFail;
}
SMLoc SLoc = Parser.getTok().getLoc();
Parser.Lex(); // Eat '{' token.
unsigned Reg, Count = 1;
StringRef LayoutStr;
SMLoc RegEndLoc, LayoutLoc;
if (!TryParseVector(Reg, RegEndLoc, LayoutStr, LayoutLoc))
return MatchOperand_ParseFail;
if (Parser.getTok().is(AsmToken::Minus)) {
Parser.Lex(); // Eat the minus.
unsigned Reg2;
StringRef LayoutStr2;
SMLoc RegEndLoc2, LayoutLoc2;
SMLoc RegLoc2 = Parser.getTok().getLoc();
if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2))
return MatchOperand_ParseFail;
unsigned Space = (Reg < Reg2) ? (Reg2 - Reg) : (Reg2 + 32 - Reg);
if (LayoutStr != LayoutStr2) {
Error(LayoutLoc2, "expected the same vector layout");
return MatchOperand_ParseFail;
}
if (Space == 0 || Space > 3) {
Error(RegLoc2, "invalid number of vectors");
return MatchOperand_ParseFail;
}
Count += Space;
} else {
unsigned LastReg = Reg;
while (Parser.getTok().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
unsigned Reg2;
StringRef LayoutStr2;
SMLoc RegEndLoc2, LayoutLoc2;
SMLoc RegLoc2 = Parser.getTok().getLoc();
if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2))
return MatchOperand_ParseFail;
unsigned Space = (LastReg < Reg2) ? (Reg2 - LastReg)
: (Reg2 + 32 - LastReg);
Count++;
// The space between two vectors should be 1. And they should have the same layout.
// Total count shouldn't be great than 4
if (Space != 1) {
Error(RegLoc2, "invalid space between two vectors");
return MatchOperand_ParseFail;
}
if (LayoutStr != LayoutStr2) {
Error(LayoutLoc2, "expected the same vector layout");
return MatchOperand_ParseFail;
}
if (Count > 4) {
Error(RegLoc2, "invalid number of vectors");
return MatchOperand_ParseFail;
}
LastReg = Reg2;
}
}
if (Parser.getTok().isNot(AsmToken::RCurly)) {
Error(Parser.getTok().getLoc(), "'}' expected");
return MatchOperand_ParseFail;
}
SMLoc ELoc = Parser.getTok().getLoc();
Parser.Lex(); // Eat '}' token.
A64Layout::VectorLayout Layout = A64StringToVectorLayout(LayoutStr);
if (Count > 1) { // If count > 1, create vector list using super register.
bool IsVec64 = (Layout < A64Layout::_16B) ? true : false;
static unsigned SupRegIDs[3][2] = {
{ AArch64::QPairRegClassID, AArch64::DPairRegClassID },
{ AArch64::QTripleRegClassID, AArch64::DTripleRegClassID },
{ AArch64::QQuadRegClassID, AArch64::DQuadRegClassID }
};
unsigned SupRegID = SupRegIDs[Count - 2][static_cast<int>(IsVec64)];
unsigned Sub0 = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0;
const MCRegisterInfo *MRI = getContext().getRegisterInfo();
Reg = MRI->getMatchingSuperReg(Reg, Sub0,
&AArch64MCRegisterClasses[SupRegID]);
}
Operands.push_back(
AArch64Operand::CreateVectorList(Reg, Count, Layout, SLoc, ELoc));
return MatchOperand_Success;
}
// FIXME: We would really like to be able to tablegen'erate this. // FIXME: We would really like to be able to tablegen'erate this.
bool AArch64AsmParser:: bool AArch64AsmParser::
validateInstruction(MCInst &Inst, validateInstruction(MCInst &Inst,

View File

@ -361,6 +361,59 @@ DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return DecodeFPR128RegisterClass(Inst, RegNo, Address, Decoder); return DecodeFPR128RegisterClass(Inst, RegNo, Address, Decoder);
} }
static DecodeStatus DecodeRegisterClassByID(llvm::MCInst &Inst, unsigned RegNo,
unsigned RegID,
const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
uint16_t Register = getReg(Decoder, RegID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Register));
return MCDisassembler::Success;
}
static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder) {
return DecodeRegisterClassByID(Inst, RegNo, AArch64::DPairRegClassID,
Decoder);
}
static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder) {
return DecodeRegisterClassByID(Inst, RegNo, AArch64::QPairRegClassID,
Decoder);
}
static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder) {
return DecodeRegisterClassByID(Inst, RegNo, AArch64::DTripleRegClassID,
Decoder);
}
static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder) {
return DecodeRegisterClassByID(Inst, RegNo, AArch64::QTripleRegClassID,
Decoder);
}
static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder) {
return DecodeRegisterClassByID(Inst, RegNo, AArch64::DQuadRegClassID,
Decoder);
}
static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder) {
return DecodeRegisterClassByID(Inst, RegNo, AArch64::QQuadRegClassID,
Decoder);
}
static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
unsigned OptionHiS, unsigned OptionHiS,
uint64_t Address, uint64_t Address,

View File

@ -507,3 +507,33 @@ void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI,
O << "#0x"; O << "#0x";
O.write_hex(Mask); O.write_hex(Mask);
} }
// If Count > 1, there are two valid kinds of vector list:
// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout}
// (2) {Vn.layout - Vm.layout}
// We choose the first kind as output.
template <A64Layout::VectorLayout Layout, unsigned Count>
void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
assert(Count >= 1 && Count <= 4 && "Invalid Number of Vectors");
unsigned Reg = MI->getOperand(OpNum).getReg();
std::string LayoutStr = A64VectorLayoutToString(Layout);
O << "{";
if (Count > 1) { // Print sub registers separately
bool IsVec64 = (Layout < A64Layout::_16B) ? true : false;
unsigned SubRegIdx = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0;
for (unsigned I = 0; I < Count; I++) {
std::string Name = getRegisterName(MRI.getSubReg(Reg, SubRegIdx++));
Name[0] = 'v';
O << Name << LayoutStr;
if (I != Count - 1)
O << ", ";
}
} else { // Print the register directly when NumVecs is 1.
std::string Name = getRegisterName(Reg);
Name[0] = 'v';
O << Name << LayoutStr;
}
O << "}";
}

View File

@ -174,6 +174,9 @@ public:
raw_ostream &O); raw_ostream &O);
void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O); raw_ostream &O);
template <A64Layout::VectorLayout Layout, unsigned Count>
void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
}; };
} }

View File

@ -306,6 +306,50 @@ namespace A64SE {
}; };
} }
namespace A64Layout {
enum VectorLayout {
Invalid = -1,
_8B,
_4H,
_2S,
_1D,
_16B,
_8H,
_4S,
_2D
};
}
inline static const char *
A64VectorLayoutToString(A64Layout::VectorLayout Layout) {
switch (Layout) {
case A64Layout::_8B: return ".8b";
case A64Layout::_4H: return ".4h";
case A64Layout::_2S: return ".2s";
case A64Layout::_1D: return ".1d";
case A64Layout::_16B: return ".16b";
case A64Layout::_8H: return ".8h";
case A64Layout::_4S: return ".4s";
case A64Layout::_2D: return ".2d";
default: llvm_unreachable("Unknown Vector Layout");
}
}
inline static A64Layout::VectorLayout
A64StringToVectorLayout(StringRef LayoutStr) {
return StringSwitch<A64Layout::VectorLayout>(LayoutStr)
.Case(".8b", A64Layout::_8B)
.Case(".4h", A64Layout::_4H)
.Case(".2s", A64Layout::_2S)
.Case(".1d", A64Layout::_1D)
.Case(".16b", A64Layout::_16B)
.Case(".8h", A64Layout::_8H)
.Case(".4s", A64Layout::_4S)
.Case(".2d", A64Layout::_2D)
.Default(A64Layout::Invalid);
}
namespace A64SysReg { namespace A64SysReg {
enum SysRegROValues { enum SysRegROValues {
MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000

File diff suppressed because it is too large Load Diff

View File

@ -3880,3 +3880,224 @@
// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frsqrts d8, s22, d18 // CHECK-ERROR: frsqrts d8, s22, d18
// CHECK-ERROR: ^ // CHECK-ERROR: ^
//----------------------------------------------------------------------
// Vector load/store multiple N-element structure (class SIMD lselem)
//----------------------------------------------------------------------
ld1 {x3}, [x2]
ld1 {v4}, [x0]
ld1 {v32.16b}, [x0]
ld1 {v15.8h}, [x32]
// CHECK-ERROR: error: expected vector type register
// CHECK-ERROR: ld1 {x3}, [x2]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected vector type register
// CHECK-ERROR: ld1 {v4}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected vector type register
// CHECK-ERROR: ld1 {v32.16b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: ld1 {v15.8h}, [x32]
// CHECK-ERROR: ^
ld1 {v0.16b, v2.16b}, [x0]
ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
ld1 v0.8b, v1.8b}, [x0]
ld1 {v0.8h-v4.8h}, [x0]
ld1 {v1.8h-v1.8h}, [x0]
ld1 {v15.8h-v17.4h}, [x15]
ld1 {v0.8b-v2.8b, [x0]
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: ld1 {v0.16b, v2.16b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid number of vectors
// CHECK-ERROR: ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: '{' expected
// CHECK-ERROR: ld1 v0.8b, v1.8b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid number of vectors
// CHECK-ERROR: ld1 {v0.8h-v4.8h}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid number of vectors
// CHECK-ERROR: ld1 {v1.8h-v1.8h}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected the same vector layout
// CHECK-ERROR: ld1 {v15.8h-v17.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: '}' expected
// CHECK-ERROR: ld1 {v0.8b-v2.8b, [x0]
// CHECK-ERROR: ^
ld2 {v15.8h, v16.4h}, [x15]
ld2 {v0.8b, v2.8b}, [x0]
ld2 {v15.4h, v16.4h, v17.4h}, [x32]
ld2 {v15.8h-v16.4h}, [x15]
ld2 {v0.2d-v2.2d}, [x0]
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: ld2 {v15.8h, v16.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: ld2 {v0.8b, v2.8b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: ld2 {v15.4h, v16.4h, v17.4h}, [x32]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected the same vector layout
// CHECK-ERROR: ld2 {v15.8h-v16.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: ld2 {v0.2d-v2.2d}, [x0]
// CHECK-ERROR: ^
ld3 {v15.8h, v16.8h, v17.4h}, [x15]
ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
ld3 {v0.8b, v2.8b, v3.8b}, [x0]
ld3 {v15.8h-v17.4h}, [x15]
ld3 {v31.4s-v2.4s}, [sp]
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: ld3 {v15.8h, v16.8h, v17.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected vector type register
// CHECK-ERROR: ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: ld3 {v0.8b, v2.8b, v3.8b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected the same vector layout
// CHECK-ERROR: ld3 {v15.8h-v17.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: ld3 {v31.4s-v2.4s}, [sp]
// CHECK-ERROR: ^
ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
ld4 {v15.8h-v18.4h}, [x15]
ld4 {v31.2s-v1.2s}, [x31]
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid number of vectors
// CHECK-ERROR: ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected the same vector layout
// CHECK-ERROR: ld4 {v15.8h-v18.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: ld4 {v31.2s-v1.2s}, [x31]
// CHECK-ERROR: ^
st1 {x3}, [x2]
st1 {v4}, [x0]
st1 {v32.16b}, [x0]
st1 {v15.8h}, [x32]
// CHECK-ERROR: error: expected vector type register
// CHECK-ERROR: st1 {x3}, [x2]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected vector type register
// CHECK-ERROR: st1 {v4}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected vector type register
// CHECK-ERROR: st1 {v32.16b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: st1 {v15.8h}, [x32]
// CHECK-ERROR: ^
st1 {v0.16b, v2.16b}, [x0]
st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
st1 v0.8b, v1.8b}, [x0]
st1 {v0.8h-v4.8h}, [x0]
st1 {v1.8h-v1.8h}, [x0]
st1 {v15.8h-v17.4h}, [x15]
st1 {v0.8b-v2.8b, [x0]
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: st1 {v0.16b, v2.16b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid number of vectors
// CHECK-ERROR: st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: '{' expected
// CHECK-ERROR: st1 v0.8b, v1.8b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid number of vectors
// CHECK-ERROR: st1 {v0.8h-v4.8h}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid number of vectors
// CHECK-ERROR: st1 {v1.8h-v1.8h}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected the same vector layout
// CHECK-ERROR: st1 {v15.8h-v17.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: '}' expected
// CHECK-ERROR: st1 {v0.8b-v2.8b, [x0]
// CHECK-ERROR: ^
st2 {v15.8h, v16.4h}, [x15]
st2 {v0.8b, v2.8b}, [x0]
st2 {v15.4h, v16.4h, v17.4h}, [x30]
st2 {v15.8h-v16.4h}, [x15]
st2 {v0.2d-v2.2d}, [x0]
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: st2 {v15.8h, v16.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: st2 {v0.8b, v2.8b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: st2 {v15.4h, v16.4h, v17.4h}, [x30]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected the same vector layout
// CHECK-ERROR: st2 {v15.8h-v16.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: st2 {v0.2d-v2.2d}, [x0]
// CHECK-ERROR: ^
st3 {v15.8h, v16.8h, v17.4h}, [x15]
st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
st3 {v0.8b, v2.8b, v3.8b}, [x0]
st3 {v15.8h-v17.4h}, [x15]
st3 {v31.4s-v2.4s}, [sp]
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: st3 {v15.8h, v16.8h, v17.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected vector type register
// CHECK-ERROR: st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: st3 {v0.8b, v2.8b, v3.8b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected the same vector layout
// CHECK-ERROR: st3 {v15.8h-v17.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: st3 {v31.4s-v2.4s}, [sp]
// CHECK-ERROR: ^
st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
st4 {v15.8h-v18.4h}, [x15]
st4 {v31.2s-v1.2s}, [x31]
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid space between two vectors
// CHECK-ERROR: st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid number of vectors
// CHECK-ERROR: st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected the same vector layout
// CHECK-ERROR: st4 {v15.8h-v18.4h}, [x15]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: st4 {v31.2s-v1.2s}, [x31]
// CHECK-ERROR: ^

View File

@ -0,0 +1,463 @@
// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Store multiple 1-element structures from one register
//------------------------------------------------------------------------------
st1 {v0.16b}, [x0]
st1 {v15.8h}, [x15]
st1 {v31.4s}, [sp]
st1 {v0.2d}, [x0]
st1 {v0.8b}, [x0]
st1 {v15.4h}, [x15]
st1 {v31.2s}, [sp]
st1 {v0.1d}, [x0]
// CHECK: st1 {v0.16b}, [x0] // encoding: [0x00,0x70,0x00,0x4c]
// CHECK: st1 {v15.8h}, [x15] // encoding: [0xef,0x75,0x00,0x4c]
// CHECK: st1 {v31.4s}, [sp] // encoding: [0xff,0x7b,0x00,0x4c]
// CHECK: st1 {v0.2d}, [x0] // encoding: [0x00,0x7c,0x00,0x4c]
// CHECK: st1 {v0.8b}, [x0] // encoding: [0x00,0x70,0x00,0x0c]
// CHECK: st1 {v15.4h}, [x15] // encoding: [0xef,0x75,0x00,0x0c]
// CHECK: st1 {v31.2s}, [sp] // encoding: [0xff,0x7b,0x00,0x0c]
// CHECK: st1 {v0.1d}, [x0] // encoding: [0x00,0x7c,0x00,0x0c]
//------------------------------------------------------------------------------
// Store multiple 1-element structures from two consecutive registers
//------------------------------------------------------------------------------
st1 {v0.16b, v1.16b}, [x0]
st1 {v15.8h, v16.8h}, [x15]
st1 {v31.4s, v0.4s}, [sp]
st1 {v0.2d, v1.2d}, [x0]
st1 {v0.8b, v1.8b}, [x0]
st1 {v15.4h, v16.4h}, [x15]
st1 {v31.2s, v0.2s}, [sp]
st1 {v0.1d, v1.1d}, [x0]
// CHECK: st1 {v0.16b, v1.16b}, [x0] // encoding: [0x00,0xa0,0x00,0x4c]
// CHECK: st1 {v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x00,0x4c]
// CHECK: st1 {v31.4s, v0.4s}, [sp] // encoding: [0xff,0xab,0x00,0x4c]
// CHECK: st1 {v0.2d, v1.2d}, [x0] // encoding: [0x00,0xac,0x00,0x4c]
// CHECK: st1 {v0.8b, v1.8b}, [x0] // encoding: [0x00,0xa0,0x00,0x0c]
// CHECK: st1 {v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x00,0x0c]
// CHECK: st1 {v31.2s, v0.2s}, [sp] // encoding: [0xff,0xab,0x00,0x0c]
// CHECK: st1 {v0.1d, v1.1d}, [x0] // encoding: [0x00,0xac,0x00,0x0c]
st1 {v0.16b-v1.16b}, [x0]
st1 {v15.8h-v16.8h}, [x15]
st1 {v31.4s-v0.4s}, [sp]
st1 {v0.2d-v1.2d}, [x0]
st1 {v0.8b-v1.8b}, [x0]
st1 {v15.4h-v16.4h}, [x15]
st1 {v31.2s-v0.2s}, [sp]
st1 {v0.1d-v1.1d}, [x0]
// CHECK: st1 {v0.16b, v1.16b}, [x0] // encoding: [0x00,0xa0,0x00,0x4c]
// CHECK: st1 {v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x00,0x4c]
// CHECK: st1 {v31.4s, v0.4s}, [sp] // encoding: [0xff,0xab,0x00,0x4c]
// CHECK: st1 {v0.2d, v1.2d}, [x0] // encoding: [0x00,0xac,0x00,0x4c]
// CHECK: st1 {v0.8b, v1.8b}, [x0] // encoding: [0x00,0xa0,0x00,0x0c]
// CHECK: st1 {v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x00,0x0c]
// CHECK: st1 {v31.2s, v0.2s}, [sp] // encoding: [0xff,0xab,0x00,0x0c]
// CHECK: st1 {v0.1d, v1.1d}, [x0] // encoding: [0x00,0xac,0x00,0x0c]
//------------------------------------------------------------------------------
// Store multiple 1-element structures from three consecutive registers
//------------------------------------------------------------------------------
st1 {v0.16b, v1.16b, v2.16b}, [x0]
st1 {v15.8h, v16.8h, v17.8h}, [x15]
st1 {v31.4s, v0.4s, v1.4s}, [sp]
st1 {v0.2d, v1.2d, v2.2d}, [x0]
st1 {v0.8b, v1.8b, v2.8b}, [x0]
st1 {v15.4h, v16.4h, v17.4h}, [x15]
st1 {v31.2s, v0.2s, v1.2s}, [sp]
st1 {v0.1d, v1.1d, v2.1d}, [x0]
// CHECK: st1 {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x00,0x4c]
// CHECK: st1 {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x00,0x4c]
// CHECK: st1 {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x00,0x4c]
// CHECK: st1 {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x00,0x4c]
// CHECK: st1 {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x00,0x0c]
// CHECK: st1 {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x00,0x0c]
// CHECK: st1 {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x00,0x0c]
// CHECK: st1 {v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x00,0x0c]
st1 {v0.16b-v2.16b}, [x0]
st1 {v15.8h-v17.8h}, [x15]
st1 {v31.4s-v1.4s}, [sp]
st1 {v0.2d-v2.2d}, [x0]
st1 {v0.8b-v2.8b}, [x0]
st1 {v15.4h-v17.4h}, [x15]
st1 {v31.2s-v1.2s}, [sp]
st1 {v0.1d-v2.1d}, [x0]
// CHECK: st1 {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x00,0x4c]
// CHECK: st1 {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x00,0x4c]
// CHECK: st1 {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x00,0x4c]
// CHECK: st1 {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x00,0x4c]
// CHECK: st1 {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x00,0x0c]
// CHECK: st1 {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x00,0x0c]
// CHECK: st1 {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x00,0x0c]
// CHECK: st1 {v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x00,0x0c]
//------------------------------------------------------------------------------
// Store multiple 1-element structures from four consecutive registers
//------------------------------------------------------------------------------
st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0]
// CHECK: st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x00,0x4c]
// CHECK: st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x00,0x4c]
// CHECK: st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x00,0x4c]
// CHECK: st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x00,0x4c]
// CHECK: st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x00,0x0c]
// CHECK: st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x00,0x0c]
// CHECK: st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x00,0x0c]
// CHECK: st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x00,0x0c]
st1 {v0.16b-v3.16b}, [x0]
st1 {v15.8h-v18.8h}, [x15]
st1 {v31.4s-v2.4s}, [sp]
st1 {v0.2d-v3.2d}, [x0]
st1 {v0.8b-v3.8b}, [x0]
st1 {v15.4h-v18.4h}, [x15]
st1 {v31.2s-v2.2s}, [sp]
st1 {v0.1d-v3.1d}, [x0]
// CHECK: st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x00,0x4c]
// CHECK: st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x00,0x4c]
// CHECK: st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x00,0x4c]
// CHECK: st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x00,0x4c]
// CHECK: st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x00,0x0c]
// CHECK: st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x00,0x0c]
// CHECK: st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x00,0x0c]
// CHECK: st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x00,0x0c]
//------------------------------------------------------------------------------
// Store multiple 2-element structures from two consecutive registers
//------------------------------------------------------------------------------
st2 {v0.16b, v1.16b}, [x0]
st2 {v15.8h, v16.8h}, [x15]
st2 {v31.4s, v0.4s}, [sp]
st2 {v0.2d, v1.2d}, [x0]
st2 {v0.8b, v1.8b}, [x0]
st2 {v15.4h, v16.4h}, [x15]
st2 {v31.2s, v0.2s}, [sp]
// CHECK: st2 {v0.16b, v1.16b}, [x0] // encoding: [0x00,0x80,0x00,0x4c]
// CHECK: st2 {v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x00,0x4c]
// CHECK: st2 {v31.4s, v0.4s}, [sp] // encoding: [0xff,0x8b,0x00,0x4c]
// CHECK: st2 {v0.2d, v1.2d}, [x0] // encoding: [0x00,0x8c,0x00,0x4c]
// CHECK: st2 {v0.8b, v1.8b}, [x0] // encoding: [0x00,0x80,0x00,0x0c]
// CHECK: st2 {v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x00,0x0c]
// CHECK: st2 {v31.2s, v0.2s}, [sp] // encoding: [0xff,0x8b,0x00,0x0c]
st2 {v0.16b-v1.16b}, [x0]
st2 {v15.8h-v16.8h}, [x15]
st2 {v31.4s-v0.4s}, [sp]
st2 {v0.2d-v1.2d}, [x0]
st2 {v0.8b-v1.8b}, [x0]
st2 {v15.4h-v16.4h}, [x15]
st2 {v31.2s-v0.2s}, [sp]
// CHECK: st2 {v0.16b, v1.16b}, [x0] // encoding: [0x00,0x80,0x00,0x4c]
// CHECK: st2 {v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x00,0x4c]
// CHECK: st2 {v31.4s, v0.4s}, [sp] // encoding: [0xff,0x8b,0x00,0x4c]
// CHECK: st2 {v0.2d, v1.2d}, [x0] // encoding: [0x00,0x8c,0x00,0x4c]
// CHECK: st2 {v0.8b, v1.8b}, [x0] // encoding: [0x00,0x80,0x00,0x0c]
// CHECK: st2 {v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x00,0x0c]
// CHECK: st2 {v31.2s, v0.2s}, [sp] // encoding: [0xff,0x8b,0x00,0x0c]
//------------------------------------------------------------------------------
// Store multiple 3-element structures from three consecutive registers
//------------------------------------------------------------------------------
st3 {v0.16b, v1.16b, v2.16b}, [x0]
st3 {v15.8h, v16.8h, v17.8h}, [x15]
st3 {v31.4s, v0.4s, v1.4s}, [sp]
st3 {v0.2d, v1.2d, v2.2d}, [x0]
st3 {v0.8b, v1.8b, v2.8b}, [x0]
st3 {v15.4h, v16.4h, v17.4h}, [x15]
st3 {v31.2s, v0.2s, v1.2s}, [sp]
// CHECK: st3 {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x00,0x4c]
// CHECK: st3 {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x00,0x4c]
// CHECK: st3 {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x00,0x4c]
// CHECK: st3 {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x00,0x4c]
// CHECK: st3 {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x00,0x0c]
// CHECK: st3 {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x00,0x0c]
// CHECK: st3 {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x00,0x0c]
st3 {v0.16b-v2.16b}, [x0]
st3 {v15.8h-v17.8h}, [x15]
st3 {v31.4s-v1.4s}, [sp]
st3 {v0.2d-v2.2d}, [x0]
st3 {v0.8b-v2.8b}, [x0]
st3 {v15.4h-v17.4h}, [x15]
st3 {v31.2s-v1.2s}, [sp]
// CHECK: st3 {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x00,0x4c]
// CHECK: st3 {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x00,0x4c]
// CHECK: st3 {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x00,0x4c]
// CHECK: st3 {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x00,0x4c]
// CHECK: st3 {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x00,0x0c]
// CHECK: st3 {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x00,0x0c]
// CHECK: st3 {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x00,0x0c]
//------------------------------------------------------------------------------
// Store multiple 4-element structures from four consecutive registers
//------------------------------------------------------------------------------
st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
// CHECK: st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x00,0x4c]
// CHECK: st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x00,0x4c]
// CHECK: st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x00,0x4c]
// CHECK: st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x00,0x4c]
// CHECK: st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x00,0x0c]
// CHECK: st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x00,0x0c]
// CHECK: st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x00,0x0c]
st4 {v0.16b-v3.16b}, [x0]
st4 {v15.8h-v18.8h}, [x15]
st4 {v31.4s-v2.4s}, [sp]
st4 {v0.2d-v3.2d}, [x0]
st4 {v0.8b-v3.8b}, [x0]
st4 {v15.4h-v18.4h}, [x15]
st4 {v31.2s-v2.2s}, [sp]
// CHECK: st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x00,0x4c]
// CHECK: st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x00,0x4c]
// CHECK: st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x00,0x4c]
// CHECK: st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x00,0x4c]
// CHECK: st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x00,0x0c]
// CHECK: st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x00,0x0c]
// CHECK: st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x00,0x0c]
//------------------------------------------------------------------------------
// Load multiple 1-element structures to one register
//------------------------------------------------------------------------------
ld1 {v0.16b}, [x0]
ld1 {v15.8h}, [x15]
ld1 {v31.4s}, [sp]
ld1 {v0.2d}, [x0]
ld1 {v0.8b}, [x0]
ld1 {v15.4h}, [x15]
ld1 {v31.2s}, [sp]
ld1 {v0.1d}, [x0]
// CHECK: ld1 {v0.16b}, [x0] // encoding: [0x00,0x70,0x40,0x4c]
// CHECK: ld1 {v15.8h}, [x15] // encoding: [0xef,0x75,0x40,0x4c]
// CHECK: ld1 {v31.4s}, [sp] // encoding: [0xff,0x7b,0x40,0x4c]
// CHECK: ld1 {v0.2d}, [x0] // encoding: [0x00,0x7c,0x40,0x4c]
// CHECK: ld1 {v0.8b}, [x0] // encoding: [0x00,0x70,0x40,0x0c]
// CHECK: ld1 {v15.4h}, [x15] // encoding: [0xef,0x75,0x40,0x0c]
// CHECK: ld1 {v31.2s}, [sp] // encoding: [0xff,0x7b,0x40,0x0c]
// CHECK: ld1 {v0.1d}, [x0] // encoding: [0x00,0x7c,0x40,0x0c]
//------------------------------------------------------------------------------
// Load multiple 1-element structures to two consecutive registers
//------------------------------------------------------------------------------
ld1 {v0.16b, v1.16b}, [x0]
ld1 {v15.8h, v16.8h}, [x15]
ld1 {v31.4s, v0.4s}, [sp]
ld1 {v0.2d, v1.2d}, [x0]
ld1 {v0.8b, v1.8b}, [x0]
ld1 {v15.4h, v16.4h}, [x15]
ld1 {v31.2s, v0.2s}, [sp]
ld1 {v0.1d, v1.1d}, [x0]
// CHECK: ld1 {v0.16b, v1.16b}, [x0] // encoding: [0x00,0xa0,0x40,0x4c]
// CHECK: ld1 {v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x40,0x4c]
// CHECK: ld1 {v31.4s, v0.4s}, [sp] // encoding: [0xff,0xab,0x40,0x4c]
// CHECK: ld1 {v0.2d, v1.2d}, [x0] // encoding: [0x00,0xac,0x40,0x4c]
// CHECK: ld1 {v0.8b, v1.8b}, [x0] // encoding: [0x00,0xa0,0x40,0x0c]
// CHECK: ld1 {v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x40,0x0c]
// CHECK: ld1 {v31.2s, v0.2s}, [sp] // encoding: [0xff,0xab,0x40,0x0c]
// CHECK: ld1 {v0.1d, v1.1d}, [x0] // encoding: [0x00,0xac,0x40,0x0c]
ld1 {v0.16b-v1.16b}, [x0]
ld1 {v15.8h-v16.8h}, [x15]
ld1 {v31.4s-v0.4s}, [sp]
ld1 {v0.2d-v1.2d}, [x0]
ld1 {v0.8b-v1.8b}, [x0]
ld1 {v15.4h-v16.4h}, [x15]
ld1 {v31.2s-v0.2s}, [sp]
ld1 {v0.1d-v1.1d}, [x0]
// CHECK: ld1 {v0.16b, v1.16b}, [x0] // encoding: [0x00,0xa0,0x40,0x4c]
// CHECK: ld1 {v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x40,0x4c]
// CHECK: ld1 {v31.4s, v0.4s}, [sp] // encoding: [0xff,0xab,0x40,0x4c]
// CHECK: ld1 {v0.2d, v1.2d}, [x0] // encoding: [0x00,0xac,0x40,0x4c]
// CHECK: ld1 {v0.8b, v1.8b}, [x0] // encoding: [0x00,0xa0,0x40,0x0c]
// CHECK: ld1 {v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x40,0x0c]
// CHECK: ld1 {v31.2s, v0.2s}, [sp] // encoding: [0xff,0xab,0x40,0x0c]
// CHECK: ld1 {v0.1d, v1.1d}, [x0] // encoding: [0x00,0xac,0x40,0x0c]
//------------------------------------------------------------------------------
// Load multiple 1-element structures to three consecutive registers
//------------------------------------------------------------------------------
ld1 {v0.16b, v1.16b, v2.16b}, [x0]
ld1 {v15.8h, v16.8h, v17.8h}, [x15]
ld1 {v31.4s, v0.4s, v1.4s}, [sp]
ld1 {v0.2d, v1.2d, v2.2d}, [x0]
ld1 {v0.8b, v1.8b, v2.8b}, [x0]
ld1 {v15.4h, v16.4h, v17.4h}, [x15]
ld1 {v31.2s, v0.2s, v1.2s}, [sp]
ld1 {v0.1d, v1.1d, v2.1d}, [x0]
// CHECK: ld1 {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x40,0x4c]
// CHECK: ld1 {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x40,0x4c]
// CHECK: ld1 {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x40,0x4c]
// CHECK: ld1 {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x40,0x4c]
// CHECK: ld1 {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x40,0x0c]
// CHECK: ld1 {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x40,0x0c]
// CHECK: ld1 {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x40,0x0c]
// CHECK: ld1 {v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x40,0x0c]
ld1 {v0.16b-v2.16b}, [x0]
ld1 {v15.8h-v17.8h}, [x15]
ld1 {v31.4s-v1.4s}, [sp]
ld1 {v0.2d-v2.2d}, [x0]
ld1 {v0.8b-v2.8b}, [x0]
ld1 {v15.4h-v17.4h}, [x15]
ld1 {v31.2s-v1.2s}, [sp]
ld1 {v0.1d-v2.1d}, [x0]
// CHECK: ld1 {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x40,0x4c]
// CHECK: ld1 {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x40,0x4c]
// CHECK: ld1 {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x40,0x4c]
// CHECK: ld1 {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x40,0x4c]
// CHECK: ld1 {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x40,0x0c]
// CHECK: ld1 {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x40,0x0c]
// CHECK: ld1 {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x40,0x0c]
// CHECK: ld1 {v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x40,0x0c]
//------------------------------------------------------------------------------
// Load multiple 1-element structures to four consecutive registers
//------------------------------------------------------------------------------
ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0]
// CHECK: ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x40,0x4c]
// CHECK: ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x40,0x4c]
// CHECK: ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x40,0x4c]
// CHECK: ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x40,0x4c]
// CHECK: ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x40,0x0c]
// CHECK: ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x40,0x0c]
// CHECK: ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x40,0x0c]
// CHECK: ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x40,0x0c]
ld1 {v0.16b-v3.16b}, [x0]
ld1 {v15.8h-v18.8h}, [x15]
ld1 {v31.4s-v2.4s}, [sp]
ld1 {v0.2d-v3.2d}, [x0]
ld1 {v0.8b-v3.8b}, [x0]
ld1 {v15.4h-v18.4h}, [x15]
ld1 {v31.2s-v2.2s}, [sp]
ld1 {v0.1d-v3.1d}, [x0]
// CHECK: ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x40,0x4c]
// CHECK: ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x40,0x4c]
// CHECK: ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x40,0x4c]
// CHECK: ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x40,0x4c]
// CHECK: ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x40,0x0c]
// CHECK: ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x40,0x0c]
// CHECK: ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x40,0x0c]
// CHECK: ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x40,0x0c]
//------------------------------------------------------------------------------
// Load multiple 4-element structures to two consecutive registers
//------------------------------------------------------------------------------
ld2 {v0.16b, v1.16b}, [x0]
ld2 {v15.8h, v16.8h}, [x15]
ld2 {v31.4s, v0.4s}, [sp]
ld2 {v0.2d, v1.2d}, [x0]
ld2 {v0.8b, v1.8b}, [x0]
ld2 {v15.4h, v16.4h}, [x15]
ld2 {v31.2s, v0.2s}, [sp]
// CHECK: ld2 {v0.16b, v1.16b}, [x0] // encoding: [0x00,0x80,0x40,0x4c]
// CHECK: ld2 {v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x40,0x4c]
// CHECK: ld2 {v31.4s, v0.4s}, [sp] // encoding: [0xff,0x8b,0x40,0x4c]
// CHECK: ld2 {v0.2d, v1.2d}, [x0] // encoding: [0x00,0x8c,0x40,0x4c]
// CHECK: ld2 {v0.8b, v1.8b}, [x0] // encoding: [0x00,0x80,0x40,0x0c]
// CHECK: ld2 {v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x40,0x0c]
// CHECK: ld2 {v31.2s, v0.2s}, [sp] // encoding: [0xff,0x8b,0x40,0x0c]
ld2 {v0.16b-v1.16b}, [x0]
ld2 {v15.8h-v16.8h}, [x15]
ld2 {v31.4s-v0.4s}, [sp]
ld2 {v0.2d-v1.2d}, [x0]
ld2 {v0.8b-v1.8b}, [x0]
ld2 {v15.4h-v16.4h}, [x15]
ld2 {v31.2s-v0.2s}, [sp]
// CHECK: ld2 {v0.16b, v1.16b}, [x0] // encoding: [0x00,0x80,0x40,0x4c]
// CHECK: ld2 {v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x40,0x4c]
// CHECK: ld2 {v31.4s, v0.4s}, [sp] // encoding: [0xff,0x8b,0x40,0x4c]
// CHECK: ld2 {v0.2d, v1.2d}, [x0] // encoding: [0x00,0x8c,0x40,0x4c]
// CHECK: ld2 {v0.8b, v1.8b}, [x0] // encoding: [0x00,0x80,0x40,0x0c]
// CHECK: ld2 {v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x40,0x0c]
// CHECK: ld2 {v31.2s, v0.2s}, [sp] // encoding: [0xff,0x8b,0x40,0x0c]
//------------------------------------------------------------------------------
// Load multiple 3-element structures to three consecutive registers
//------------------------------------------------------------------------------
ld3 {v0.16b, v1.16b, v2.16b}, [x0]
ld3 {v15.8h, v16.8h, v17.8h}, [x15]
ld3 {v31.4s, v0.4s, v1.4s}, [sp]
ld3 {v0.2d, v1.2d, v2.2d}, [x0]
ld3 {v0.8b, v1.8b, v2.8b}, [x0]
ld3 {v15.4h, v16.4h, v17.4h}, [x15]
ld3 {v31.2s, v0.2s, v1.2s}, [sp]
// CHECK: ld3 {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x40,0x4c]
// CHECK: ld3 {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x40,0x4c]
// CHECK: ld3 {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x40,0x4c]
// CHECK: ld3 {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x40,0x4c]
// CHECK: ld3 {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x40,0x0c]
// CHECK: ld3 {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x40,0x0c]
// CHECK: ld3 {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x40,0x0c]
ld3 {v0.16b-v2.16b}, [x0]
ld3 {v15.8h-v17.8h}, [x15]
ld3 {v31.4s-v1.4s}, [sp]
ld3 {v0.2d-v2.2d}, [x0]
ld3 {v0.8b-v2.8b}, [x0]
ld3 {v15.4h-v17.4h}, [x15]
ld3 {v31.2s-v1.2s}, [sp]
// CHECK: ld3 {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x40,0x4c]
// CHECK: ld3 {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x40,0x4c]
// CHECK: ld3 {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x40,0x4c]
// CHECK: ld3 {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x40,0x4c]
// CHECK: ld3 {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x40,0x0c]
// CHECK: ld3 {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x40,0x0c]
// CHECK: ld3 {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x40,0x0c]
//------------------------------------------------------------------------------
// Load multiple 4-element structures to four consecutive registers
//------------------------------------------------------------------------------
ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
// CHECK: ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x40,0x4c]
// CHECK: ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x40,0x4c]
// CHECK: ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x40,0x4c]
// CHECK: ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x40,0x4c]
// CHECK: ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x40,0x0c]
// CHECK: ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x40,0x0c]
// CHECK: ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x40,0x0c]
ld4 {v0.16b-v3.16b}, [x0]
ld4 {v15.8h-v18.8h}, [x15]
ld4 {v31.4s-v2.4s}, [sp]
ld4 {v0.2d-v3.2d}, [x0]
ld4 {v0.8b-v3.8b}, [x0]
ld4 {v15.4h-v18.4h}, [x15]
ld4 {v31.2s-v2.2s}, [sp]
// CHECK: ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x40,0x4c]
// CHECK: ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x40,0x4c]
// CHECK: ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x40,0x4c]
// CHECK: ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x40,0x4c]
// CHECK: ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x40,0x0c]
// CHECK: ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x40,0x0c]
// CHECK: ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x40,0x0c]