diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index acb7453ddcb..398369fec44 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -6085,6 +6085,56 @@ def VLD3qWB_register_Asm_32 : (ins VecListThreeQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VST3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + // VST3 multiple structurepseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 01c3c6dc5aa..2fdfb21b45a 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5254,6 +5254,53 @@ static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) { Spacing = 2; return ARM::VST2LNq32; + // VST3LN + case ARM::VST3LNdWB_fixed_Asm_8: + Spacing = 1; + return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_fixed_Asm_16: + Spacing = 1; + return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_fixed_Asm_32: + Spacing = 1; + return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_fixed_Asm_16: + Spacing = 1; + return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_fixed_Asm_32: + Spacing = 2; + return ARM::VST3LNq32_UPD; + case ARM::VST3LNdWB_register_Asm_8: + Spacing = 1; + return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_register_Asm_16: + Spacing = 1; + return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_register_Asm_32: + Spacing = 1; + return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_register_Asm_16: + Spacing = 2; + return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_register_Asm_32: + Spacing = 2; + return ARM::VST3LNq32_UPD; + case ARM::VST3LNdAsm_8: + Spacing = 1; + return ARM::VST3LNd8; + case ARM::VST3LNdAsm_16: + Spacing = 1; + return ARM::VST3LNd16; + case ARM::VST3LNdAsm_32: + Spacing = 1; + return ARM::VST3LNd32; + case ARM::VST3LNqAsm_16: + Spacing = 2; + return ARM::VST3LNq16; + case ARM::VST3LNqAsm_32: + Spacing = 2; + return ARM::VST3LNq32; + // VST3 case ARM::VST3dWB_fixed_Asm_8: Spacing = 1; @@ -5560,6 +5607,33 @@ processInstruction(MCInst &Inst, Inst = TmpInst; return true; } + + case ARM::VST3LNdWB_register_Asm_8: + case ARM::VST3LNdWB_register_Asm_16: + case ARM::VST3LNdWB_register_Asm_32: + case ARM::VST3LNqWB_register_Asm_16: + case ARM::VST3LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_32: { @@ -5603,6 +5677,33 @@ processInstruction(MCInst &Inst, Inst = TmpInst; return true; } + + case ARM::VST3LNdWB_fixed_Asm_8: + case ARM::VST3LNdWB_fixed_Asm_16: + case ARM::VST3LNdWB_fixed_Asm_32: + case ARM::VST3LNqWB_fixed_Asm_16: + case ARM::VST3LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_32: { @@ -5642,6 +5743,31 @@ processInstruction(MCInst &Inst, Inst = TmpInst; return true; } + + case ARM::VST3LNdAsm_8: + case ARM::VST3LNdAsm_16: + case ARM::VST3LNdAsm_32: + case ARM::VST3LNqAsm_16: + case ARM::VST3LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle NEON VLD complex aliases. case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_16: diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s index 79e74f58709..fad4ae87de2 100644 --- a/test/MC/ARM/neon-vst-encoding.s +++ b/test/MC/ARM/neon-vst-encoding.s @@ -153,6 +153,41 @@ @ CHECK: vst2.32 {d5[0], d7[0]}, [r4, :64], r7 @ encoding: [0x57,0x59,0x84,0xf4] + vst3.8 {d16[1], d17[1], d18[1]}, [r1] + vst3.16 {d6[1], d7[1], d8[1]}, [r2] + vst3.32 {d1[1], d2[1], d3[1]}, [r3] + vst3.u16 {d27[1], d29[1], d31[1]}, [r4] + vst3.i32 {d6[1], d8[1], d10[1]}, [r5] + + vst3.i8 {d12[1], d13[1], d14[1]}, [r6], r1 + vst3.i16 {d11[1], d12[1], d13[1]}, [r7], r2 + vst3.u32 {d2[1], d3[1], d4[1]}, [r8], r3 + vst3.u16 {d14[1], d16[1], d18[1]}, [r9], r4 + vst3.i32 {d16[1], d18[1], d20[1]}, [r10], r5 + + vst3.p8 {d6[1], d7[1], d8[1]}, [r8]! + vst3.16 {d9[1], d10[1], d11[1]}, [r7]! + vst3.f32 {d1[1], d2[1], d3[1]}, [r6]! + vst3.p16 {d20[1], d22[1], d24[1]}, [r5]! + vst3.32 {d5[1], d7[1], d9[1]}, [r4]! + +@ CHECK: vst3.8 {d16[1], d17[1], d18[1]}, [r1] @ encoding: [0x2f,0x02,0xc1,0xf4] +@ CHECK: vst3.16 {d6[1], d7[1], d8[1]}, [r2] @ encoding: [0x4f,0x66,0x82,0xf4] +@ CHECK: vst3.32 {d1[1], d2[1], d3[1]}, [r3] @ encoding: [0x8f,0x1a,0x83,0xf4] +@ CHECK: vst3.16 {d27[1], d29[1], d31[1]}, [r4] @ encoding: [0x6f,0xb6,0xc4,0xf4] +@ CHECK: vst3.32 {d6[1], d8[1], d10[1]}, [r5] @ encoding: [0xcf,0x6a,0x85,0xf4] +@ CHECK: vst3.8 {d12[1], d13[1], d14[1]}, [r6], r1 @ encoding: [0x21,0xc2,0x86,0xf4] +@ CHECK: vst3.16 {d11[1], d12[1], d13[1]}, [r7], r2 @ encoding: [0x42,0xb6,0x87,0xf4] +@ CHECK: vst3.32 {d2[1], d3[1], d4[1]}, [r8], r3 @ encoding: [0x83,0x2a,0x88,0xf4] +@ CHECK: vst3.16 {d14[1], d16[1], d18[1]}, [r9], r4 @ encoding: [0x64,0xe6,0x89,0xf4] +@ CHECK: vst3.32 {d16[1], d18[1], d20[1]}, [r10], r5 @ encoding: [0xc5,0x0a,0xca,0xf4] +@ CHECK: vst3.8 {d6[1], d7[1], d8[1]}, [r8]! @ encoding: [0x2d,0x62,0x88,0xf4] +@ CHECK: vst3.16 {d9[1], d10[1], d11[1]}, [r7]! @ encoding: [0x4d,0x96,0x87,0xf4] +@ CHECK: vst3.32 {d1[1], d2[1], d3[1]}, [r6]! @ encoding: [0x8d,0x1a,0x86,0xf4] +@ CHECK: vst3.16 {d20[1], d21[1], d22[1]}, [r5]! @ encoding: [0x6d,0x46,0xc5,0xf4] +@ CHECK: vst3.32 {d5[1], d7[1], d9[1]}, [r4]! @ encoding: [0xcd,0x5a,0x84,0xf4] + + @ vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]