diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index a0d04c03fc2..27f2b67fcd9 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1996,7 +1996,7 @@ class NEONFPPat : Pat { // VFP/NEON Instruction aliases for type suffices. class VFPDataTypeInstAlias : - InstAlias; + InstAlias, Requires<[HasVFP2]>; multiclass VFPDT8ReqInstAlias { def I8 : VFPDataTypeInstAlias; def S8 : VFPDataTypeInstAlias; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3f27b0c2f60..a7a74de56d6 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -3669,15 +3669,6 @@ def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; -// Two-operand aliases. -def : NEONInstAlias<"vmul${p}.f32 $Ddn $Dm$lane", - (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; -def : NEONInstAlias<"vmul${p}.f32 $Qdn $Dm$lane", - (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; - - def : Pat<(v8i16 (mul (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), @@ -5620,6 +5611,79 @@ defm VEXTq : VFPDT32ReqInstAlias<"vext${p}", "$Vd, $Vn, $Vm, $index", defm VEXTq : VFPDT64ReqInstAlias<"vext${p}", "$Vd, $Vn, $Vm, $index", (VEXTq64 QPR:$Vd, QPR:$Vn, QPR:$Vm, imm0_1:$index, pred:$p)>; +// VMUL instructions data type suffix aliases for more-specific types. +def : NEONInstAlias<"vmul${p}.s16 $Dd, $Dn $Dm$lane", + (VMULslv4i16 DPR:$Dd, DPR:$Dn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.s16 $Qd, $Qn, $Dm$lane", + (VMULslv8i16 QPR:$Qd, QPR:$Qn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.u16 $Dd, $Dn $Dm$lane", + (VMULslv4i16 DPR:$Dd, DPR:$Dn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.u16 $Qd, $Qn, $Dm$lane", + (VMULslv8i16 QPR:$Qd, QPR:$Qn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.s32 $Dd, $Dn $Dm$lane", + (VMULslv2i32 DPR:$Dd, DPR:$Dn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.s32 $Qd, $Qn, $Dm$lane", + (VMULslv4i32 QPR:$Qd, QPR:$Qn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.u32 $Dd, $Dn $Dm$lane", + (VMULslv2i32 DPR:$Dd, DPR:$Dn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.u32 $Qd, $Qn, $Dm$lane", + (VMULslv4i32 QPR:$Qd, QPR:$Qn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +// VMUL two-operand aliases. +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", + (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", + (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.s16 $Ddn, $Dm$lane", + (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.s16 $Qdn, $Dm$lane", + (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.u16 $Ddn, $Dm$lane", + (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.u16 $Qdn, $Dm$lane", + (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", + (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", + (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.s32 $Ddn, $Dm$lane", + (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.s32 $Qdn, $Dm$lane", + (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.u32 $Ddn, $Dm$lane", + (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.u32 $Qdn, $Dm$lane", + (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", + (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", + (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr", diff --git a/test/MC/ARM/neon-mul-encoding.s b/test/MC/ARM/neon-mul-encoding.s index 1c7caf2a550..990187e7821 100644 --- a/test/MC/ARM/neon-mul-encoding.s +++ b/test/MC/ARM/neon-mul-encoding.s @@ -72,3 +72,68 @@ @ CHECK: vqdmull.s16 q8, d16, d17 @ encoding: [0xa1,0x0d,0xd0,0xf2] @ CHECK: vqdmull.s32 q8, d16, d17 @ encoding: [0xa1,0x0d,0xe0,0xf2] + + + vmul.i16 d0, d4[2] + vmul.s16 d1, d7[3] + vmul.u16 d2, d1[1] + vmul.i32 d3, d2[0] + vmul.s32 d4, d3[1] + vmul.u32 d5, d4[0] + vmul.f32 d6, d5[1] + + vmul.i16 q0, d4[2] + vmul.s16 q1, d7[3] + vmul.u16 q2, d1[1] + vmul.i32 q3, d2[0] + vmul.s32 q4, d3[1] + vmul.u32 q5, d4[0] + vmul.f32 q6, d5[1] + + vmul.i16 d9, d0, d4[2] + vmul.s16 d8, d1, d7[3] + vmul.u16 d7, d2, d1[1] + vmul.i32 d6, d3, d2[0] + vmul.s32 d5, d4, d3[1] + vmul.u32 d4, d5, d4[0] + vmul.f32 d3, d6, d5[1] + + vmul.i16 q9, q0, d4[2] + vmul.s16 q8, q1, d7[3] + vmul.u16 q7, q2, d1[1] + vmul.i32 q6, q3, d2[0] + vmul.s32 q5, q4, d3[1] + vmul.u32 q4, q5, d4[0] + vmul.f32 q3, q6, d5[1] + +@ CHECK: vmul.i16 d0, d0, d4[2] @ encoding: [0x64,0x08,0x90,0xf2] +@ CHECK: vmul.i16 d1, d1, d7[3] @ encoding: [0x6f,0x18,0x91,0xf2] +@ CHECK: vmul.i16 d2, d2, d1[1] @ encoding: [0x49,0x28,0x92,0xf2] +@ CHECK: vmul.i32 d3, d3, d2[0] @ encoding: [0x42,0x38,0xa3,0xf2] +@ CHECK: vmul.i32 d4, d4, d3[1] @ encoding: [0x63,0x48,0xa4,0xf2] +@ CHECK: vmul.i32 d5, d5, d4[0] @ encoding: [0x44,0x58,0xa5,0xf2] +@ CHECK: vmul.f32 d6, d6, d5[1] @ encoding: [0x65,0x69,0xa6,0xf2] + +@ CHECK: vmul.i16 q0, q0, d4[2] @ encoding: [0x64,0x08,0x90,0xf3] +@ CHECK: vmul.i16 q1, q1, d7[3] @ encoding: [0x6f,0x28,0x92,0xf3] +@ CHECK: vmul.i16 q2, q2, d1[1] @ encoding: [0x49,0x48,0x94,0xf3] +@ CHECK: vmul.i32 q3, q3, d2[0] @ encoding: [0x42,0x68,0xa6,0xf3] +@ CHECK: vmul.i32 q4, q4, d3[1] @ encoding: [0x63,0x88,0xa8,0xf3] +@ CHECK: vmul.i32 q5, q5, d4[0] @ encoding: [0x44,0xa8,0xaa,0xf3] +@ CHECK: vmul.f32 q6, q6, d5[1] @ encoding: [0x65,0xc9,0xac,0xf3] + +@ CHECK: vmul.i16 d9, d0, d4[2] @ encoding: [0x64,0x98,0x90,0xf2] +@ CHECK: vmul.i16 d8, d1, d7[3] @ encoding: [0x6f,0x88,0x91,0xf2] +@ CHECK: vmul.i16 d7, d2, d1[1] @ encoding: [0x49,0x78,0x92,0xf2] +@ CHECK: vmul.i32 d6, d3, d2[0] @ encoding: [0x42,0x68,0xa3,0xf2] +@ CHECK: vmul.i32 d5, d4, d3[1] @ encoding: [0x63,0x58,0xa4,0xf2] +@ CHECK: vmul.i32 d4, d5, d4[0] @ encoding: [0x44,0x48,0xa5,0xf2] +@ CHECK: vmul.f32 d3, d6, d5[1] @ encoding: [0x65,0x39,0xa6,0xf2] + +@ CHECK: vmul.i16 q9, q0, d4[2] @ encoding: [0x64,0x28,0xd0,0xf3] +@ CHECK: vmul.i16 q8, q1, d7[3] @ encoding: [0x6f,0x08,0xd2,0xf3] +@ CHECK: vmul.i16 q7, q2, d1[1] @ encoding: [0x49,0xe8,0x94,0xf3] +@ CHECK: vmul.i32 q6, q3, d2[0] @ encoding: [0x42,0xc8,0xa6,0xf3] +@ CHECK: vmul.i32 q5, q4, d3[1] @ encoding: [0x63,0xa8,0xa8,0xf3] +@ CHECK: vmul.i32 q4, q5, d4[0] @ encoding: [0x44,0x88,0xaa,0xf3] +@ CHECK: vmul.f32 q3, q6, d5[1] @ encoding: [0x65,0x69,0xac,0xf3]