ARM assembly parsing and encoding for VLD2 with writeback.

Refactor the instructions into fixed writeback and register-stride
writeback variants to simplify the offset operand (no more optional
register operand using reg0). This is a simpler representation and allows
the assembly parser to more easily handle these instructions.

Add tests for the instruction variants now supported.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146278 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jim Grosbach 2011-12-09 21:28:25 +00:00
parent 2fac1d5d61
commit a4e3c7fc4b
6 changed files with 167 additions and 67 deletions

View File

@ -2411,12 +2411,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD2q8:
case ARM::VLD2q16:
case ARM::VLD2q32:
case ARM::VLD2d8_UPD:
case ARM::VLD2d16_UPD:
case ARM::VLD2d32_UPD:
case ARM::VLD2q8_UPD:
case ARM::VLD2q16_UPD:
case ARM::VLD2q32_UPD:
case ARM::VLD2d8wb_fixed:
case ARM::VLD2d16wb_fixed:
case ARM::VLD2d32wb_fixed:
case ARM::VLD2q8wb_fixed:
case ARM::VLD2q16wb_fixed:
case ARM::VLD2q32wb_fixed:
case ARM::VLD2d8wb_register:
case ARM::VLD2d16wb_register:
case ARM::VLD2d32wb_register:
case ARM::VLD2q8wb_register:
case ARM::VLD2q16wb_register:
case ARM::VLD2q32wb_register:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
@ -2581,12 +2587,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
case ARM::VLD2d8Pseudo_UPD:
case ARM::VLD2d16Pseudo_UPD:
case ARM::VLD2d32Pseudo_UPD:
case ARM::VLD2q8Pseudo_UPD:
case ARM::VLD2q16Pseudo_UPD:
case ARM::VLD2q32Pseudo_UPD:
case ARM::VLD2d8PseudoWB_fixed:
case ARM::VLD2d16PseudoWB_fixed:
case ARM::VLD2d32PseudoWB_fixed:
case ARM::VLD2q8PseudoWB_fixed:
case ARM::VLD2q16PseudoWB_fixed:
case ARM::VLD2q32PseudoWB_fixed:
case ARM::VLD2d8PseudoWB_register:
case ARM::VLD2d16PseudoWB_register:
case ARM::VLD2d32PseudoWB_register:
case ARM::VLD2q8PseudoWB_register:
case ARM::VLD2q16PseudoWB_register:
case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:

View File

@ -180,18 +180,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false},
{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false},
{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false},
{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false},
{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false},
{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false},
{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
@ -1095,12 +1101,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
case ARM::VLD2d8Pseudo_UPD:
case ARM::VLD2d16Pseudo_UPD:
case ARM::VLD2d32Pseudo_UPD:
case ARM::VLD2q8Pseudo_UPD:
case ARM::VLD2q16Pseudo_UPD:
case ARM::VLD2q32Pseudo_UPD:
case ARM::VLD2d8PseudoWB_fixed:
case ARM::VLD2d16PseudoWB_fixed:
case ARM::VLD2d32PseudoWB_fixed:
case ARM::VLD2q8PseudoWB_fixed:
case ARM::VLD2q16PseudoWB_fixed:
case ARM::VLD2q32PseudoWB_fixed:
case ARM::VLD2d8PseudoWB_register:
case ARM::VLD2d16PseudoWB_register:
case ARM::VLD2d32PseudoWB_register:
case ARM::VLD2q8PseudoWB_register:
case ARM::VLD2q16PseudoWB_register:
case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:

View File

@ -1581,6 +1581,14 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register;
case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register;
case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register;
case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register;
case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
}
return Opc; // If not one we handle, return it unchanged.
}
@ -1648,13 +1656,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VLD1 fixed increment doesn't need Reg0. Remove the reg0
// FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
if ((NumVecs != 1 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
!isa<ConstantSDNode>(Inc.getNode()))
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
@ -2812,10 +2820,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2_UPD: {
unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed};
unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
ARM::VLD2q32Pseudo_UPD };
unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed,
ARM::VLD2d16PseudoWB_fixed,
ARM::VLD2d32PseudoWB_fixed,
ARM::VLD1q64PseudoWB_fixed};
unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
ARM::VLD2q16PseudoWB_fixed,
ARM::VLD2q32PseudoWB_fixed };
return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
}

View File

@ -308,12 +308,23 @@ class VLDQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset), itin,
"$addr.addr = $wb">;
class VLDQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
class VLDQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
class VLDQQWBfixedPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr), itin,
"$addr.addr = $wb">;
class VLDQQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset), itin,
"$addr.addr = $wb">;
class VLDQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
"$src = $dst">;
@ -525,39 +536,56 @@ def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
class VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
InstrItinClass itin>
: NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), itin,
"vld2", Dt, "$Vd, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
RegisterOperand VdTy, InstrItinClass itin> {
def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
(ins addrmode6:$Rn), itin,
"vld2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), itin,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
def VLD2d8_UPD : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>;
def VLD2d16_UPD : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>;
def VLD2d32_UPD : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>;
defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>;
defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>;
defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>;
def VLD2q8_UPD : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
def VLD2q16_UPD : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
def VLD2q32_UPD : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
// ...with double-spaced registers
def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>;
def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>;
def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>;
def VLD2b8_UPD : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>;
def VLD2b16_UPD : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>;
def VLD2b32_UPD : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>;
def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>;
def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>;
def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>;
defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>;
defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>;
defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>

View File

@ -2085,15 +2085,24 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VLD1d32Qwb_register:
case ARM::VLD1d64Qwb_fixed:
case ARM::VLD1d64Qwb_register:
case ARM::VLD2d8_UPD:
case ARM::VLD2d16_UPD:
case ARM::VLD2d32_UPD:
case ARM::VLD2q8_UPD:
case ARM::VLD2q16_UPD:
case ARM::VLD2q32_UPD:
case ARM::VLD2b8_UPD:
case ARM::VLD2b16_UPD:
case ARM::VLD2b32_UPD:
case ARM::VLD2d8wb_fixed:
case ARM::VLD2d16wb_fixed:
case ARM::VLD2d32wb_fixed:
case ARM::VLD2q8wb_fixed:
case ARM::VLD2q16wb_fixed:
case ARM::VLD2q32wb_fixed:
case ARM::VLD2d8wb_register:
case ARM::VLD2d16wb_register:
case ARM::VLD2d32wb_register:
case ARM::VLD2q8wb_register:
case ARM::VLD2q16wb_register:
case ARM::VLD2q32wb_register:
case ARM::VLD2b8wb_fixed:
case ARM::VLD2b16wb_fixed:
case ARM::VLD2b32wb_fixed:
case ARM::VLD2b8wb_register:
case ARM::VLD2b16wb_register:
case ARM::VLD2b32wb_register:
case ARM::VLD3d8_UPD:
case ARM::VLD3d16_UPD:
case ARM::VLD3d32_UPD:

View File

@ -118,6 +118,20 @@
vld2.16 {d16, d17, d18, d19}, [r0, :128]
vld2.32 {d16, d17, d18, d19}, [r0, :256]
vld2.8 {d19, d20}, [r0, :64]!
vld2.16 {d16, d17}, [r0, :128]!
vld2.32 {q10}, [r0]!
vld2.8 {d4-d7}, [r0, :64]!
vld2.16 {d1, d2, d3, d4}, [r0, :128]!
vld2.32 {q7, q8}, [r0, :256]!
vld2.8 {d19, d20}, [r0, :64], r6
vld2.16 {d16, d17}, [r0, :128], r6
vld2.32 {q10}, [r0], r6
vld2.8 {d4-d7}, [r0, :64], r6
vld2.16 {d1, d2, d3, d4}, [r0, :128], r6
vld2.32 {q7, q8}, [r0, :256], r6
@ CHECK: vld2.8 {d16, d17}, [r0, :64] @ encoding: [0x1f,0x08,0x60,0xf4]
@ CHECK: vld2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x60,0xf4]
@ CHECK: vld2.32 {d16, d17}, [r0] @ encoding: [0x8f,0x08,0x60,0xf4]
@ -125,6 +139,20 @@
@ CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf4]
@ CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf4]
@ CHECK: vld2.8 {d19, d20}, [r0, :64]! @ encoding: [0x1d,0x38,0x60,0xf4]
@ CHECK: vld2.16 {d16, d17}, [r0, :128]! @ encoding: [0x6d,0x08,0x60,0xf4]
@ CHECK: vld2.32 {d20, d21}, [r0]! @ encoding: [0x8d,0x48,0x60,0xf4]
@ CHECK: vld2.8 {d4, d5, d6, d7}, [r0, :64]! @ encoding: [0x1d,0x43,0x20,0xf4]
@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128]! @ encoding: [0x6d,0x13,0x20,0xf4]
@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256]! @ encoding: [0xbd,0xe3,0x20,0xf4]
@ CHECK: vld2.8 {d19, d20}, [r0, :64], r6 @ encoding: [0x16,0x38,0x60,0xf4]
@ CHECK: vld2.16 {d16, d17}, [r0, :128], r6 @ encoding: [0x66,0x08,0x60,0xf4]
@ CHECK: vld2.32 {d20, d21}, [r0], r6 @ encoding: [0x86,0x48,0x60,0xf4]
@ CHECK: vld2.8 {d4, d5, d6, d7}, [r0, :64], r6 @ encoding: [0x16,0x43,0x20,0xf4]
@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128], r6 @ encoding: [0x66,0x13,0x20,0xf4]
@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256], r6 @ encoding: [0xb6,0xe3,0x20,0xf4]
@ vld3.8 {d16, d17, d18}, [r0, :64]
@ vld3.16 {d16, d17, d18}, [r0]