diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 80704829655..691ef037bfb 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1495,10 +1495,10 @@ class N3VLInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N3V; + (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set QPR:$Vd, + (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> diff --git a/test/MC/ARM/neon-mul-accum-encoding.ll b/test/MC/ARM/neon-mul-accum-encoding.ll index 891e76fd04e..ff794e29ca1 100644 --- a/test/MC/ARM/neon-mul-accum-encoding.ll +++ b/test/MC/ARM/neon-mul-accum-encoding.ll @@ -165,3 +165,27 @@ define <2 x i64> @vmlalu_2xi32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) noun %tmp7 = add <2 x i64> %tmp1, %tmp6 ret <2 x i64> %tmp7 } + +declare <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone + +; CHECK: vqdmlal_4xi16 +define <4 x i32> @vqdmlal_4xi16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C +; CHECK: vqdmlal.s16 q8, d19, d18 @ encoding: [0xa2,0x09,0xd3,0xf2 + %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) + ret <4 x i32> %tmp4 +} + +; CHECK: vqdmlal_2xi32 +define <2 x i64> @vqdmlal_2xi32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C +; CHECK: vqdmlal.s32 q8, d19, d18 @ encoding: [0xa2,0x09,0xe3,0xf2] + %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) + ret <2 x i64> %tmp4 +} +