diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 23fae3e0806..7a7267a719b 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2440,7 +2440,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d8_UPD: case ARM::VLD4d16_UPD: case ARM::VLD4d32_UPD: - case ARM::VLD1d64Q_UPD: + case ARM::VLD1d64Qwb_fixed: + case ARM::VLD1d64Qwb_register: case ARM::VLD4q8_UPD: case ARM::VLD4q16_UPD: case ARM::VLD4q32_UPD: diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 9d0350b322d..75418aa9758 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -424,13 +424,24 @@ class VLD1D4 op7_4, string Dt> let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -class VLD1D4WB op7_4, string Dt> - : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt, - "$Vd, $Rn$Rm", "$Rn.addr = $wb", - []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1D4WB op7_4, string Dt> { + def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; @@ -438,12 +449,12 @@ def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; -def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">; -def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">; -def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">; -def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">; +defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; +defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; +defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; +defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; -def VLD1d64QPseudo : VLDQQPseudo; +def VLD1d64QPseudo : VLDQQPseudo; // VLD2 : Vector Load (multiple 2-element structures) class VLD2D op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ddc5c99d364..5174134c465 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2078,10 +2078,14 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD1d32Twb_register: case ARM::VLD1d64Twb_fixed: case ARM::VLD1d64Twb_register: - case ARM::VLD1d8Q_UPD: - case ARM::VLD1d16Q_UPD: - case ARM::VLD1d32Q_UPD: - case ARM::VLD1d64Q_UPD: + case ARM::VLD1d8Qwb_fixed: + case ARM::VLD1d8Qwb_register: + case ARM::VLD1d16Qwb_fixed: + case ARM::VLD1d16Qwb_register: + case ARM::VLD1d32Qwb_fixed: + case ARM::VLD1d32Qwb_register: + case ARM::VLD1d64Qwb_fixed: + case ARM::VLD1d64Qwb_register: case ARM::VLD2d8_UPD: case ARM::VLD2d16_UPD: case ARM::VLD2d32_UPD: diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s index 08eb88403ad..1a77966bca0 100644 --- a/test/MC/ARM/neon-vld-encoding.s +++ b/test/MC/ARM/neon-vld-encoding.s @@ -45,6 +45,16 @@ vld1.32 {d5, d6, d7}, [r3], r6 vld1.64 {d6, d7, d8}, [r3, :64], r6 + vld1.8 {d1, d2, d3, d4}, [r3]! + vld1.16 {d4, d5, d6, d7}, [r3, :64]! + vld1.32 {d5, d6, d7, d8}, [r3]! + vld1.64 {d6, d7, d8, d9}, [r3, :64]! + + vld1.8 {d1, d2, d3, d4}, [r3], r8 + vld1.16 {d4, d5, d6, d7}, [r3, :64], r8 + vld1.32 {d5, d6, d7, d8}, [r3], r8 + vld1.64 {d6, d7, d8, d9}, [r3, :64], r8 + @ CHECK: vld1.8 {d16}, [r0, :64] @ encoding: [0x1f,0x07,0x60,0xf4] @ CHECK: vld1.16 {d16}, [r0] @ encoding: [0x4f,0x07,0x60,0xf4] @ CHECK: vld1.32 {d16}, [r0] @ encoding: [0x8f,0x07,0x60,0xf4] @@ -90,6 +100,16 @@ @ CHECK: vld1.32 {d5, d6, d7}, [r3], r6 @ encoding: [0x86,0x56,0x23,0xf4] @ CHECK: vld1.64 {d6, d7, d8}, [r3, :64], r6 @ encoding: [0xd6,0x66,0x23,0xf4] +@ CHECK: vld1.8 {d1, d2, d3, d4}, [r3]! @ encoding: [0x0d,0x12,0x23,0xf4] +@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64]! @ encoding: [0x5d,0x42,0x23,0xf4] +@ CHECK: vld1.32 {d5, d6, d7, d8}, [r3]! @ encoding: [0x8d,0x52,0x23,0xf4] +@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64]! @ encoding: [0xdd,0x62,0x23,0xf4] + +@ CHECK: vld1.8 {d1, d2, d3, d4}, [r3], r8 @ encoding: [0x08,0x12,0x23,0xf4] +@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64], r8 @ encoding: [0x58,0x42,0x23,0xf4] +@ CHECK: vld1.32 {d5, d6, d7, d8}, [r3], r8 @ encoding: [0x88,0x52,0x23,0xf4] +@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64], r8 @ encoding: [0xd8,0x62,0x23,0xf4] + vld2.8 {d16, d17}, [r0, :64] vld2.16 {d16, d17}, [r0, :128]