diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 3d43edba728..7069e91ce54 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1754,6 +1754,17 @@ class N2VImm op11_8, bit op7, bit op6, bit op4, let Inst{7} = op7; let Inst{6} = op6; let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<5> Vm; + bits<6> SIMM; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; + let Inst{21-16} = SIMM{5-0}; } // NEON 3 vector register format. diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 232dd486a05..afb2afa7d60 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1289,6 +1289,15 @@ class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { let isCommutable = 0; } +class N3VDIntSh op21_20, bits<4> op11_8, bit op4, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> + : N3V { + let isCommutable = Commutable; +} class N3VQInt op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, @@ -1323,6 +1332,15 @@ class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]> { let isCommutable = 0; } +class N3VQIntSh op21_20, bits<4> op11_8, bit op4, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> + : N3V { + let isCommutable = Commutable; +} // Multiply-Add/Sub operations: single-, double- and quad-register. class N3VSMulOp op21_20, bits<4> op11_8, bit op4, @@ -1936,6 +1954,27 @@ multiclass N3VInt_HS op11_8, bit op4, Format f, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp, Commutable>; } +multiclass N3VInt_HSSh op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> { + // 64-bit vector types. + def v4i16 : N3VDIntSh; + def v2i32 : N3VDIntSh; + + // 128-bit vector types. + def v8i16 : N3VQIntSh; + def v4i32 : N3VQIntSh; +} multiclass N3VIntSL_HS op11_8, InstrItinClass itinD16, InstrItinClass itinD32, @@ -1966,6 +2005,21 @@ multiclass N3VInt_QHS op11_8, bit op4, Format f, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp, Commutable>; } +multiclass N3VInt_QHSSh op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> + : N3VInt_HSSh { + def v8i8 : N3VDIntSh; + def v16i8 : N3VQIntSh; +} + // ....then also with element size of 64 bits: multiclass N3VInt_QHSD op11_8, bit op4, Format f, @@ -1982,6 +2036,20 @@ multiclass N3VInt_QHSD op11_8, bit op4, Format f, OpcodeStr, !strconcat(Dt, "64"), v2i64, v2i64, IntOp, Commutable>; } +multiclass N3VInt_QHSDSh op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> + : N3VInt_QHSSh { + def v1i64 : N3VDIntSh; + def v2i64 : N3VQIntSh; +} // Neon Narrowing 3-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: @@ -3160,10 +3228,10 @@ def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, // Vector Shifts. // VSHL : Vector Shift -defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm, +defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>; -defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm, +defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>; // VSHL : Vector Shift Left (Immediate) diff --git a/test/MC/ARM/neon-shift-encoding.ll b/test/MC/ARM/neon-shift-encoding.ll new file mode 100644 index 00000000000..0c45b1480c1 --- /dev/null +++ b/test/MC/ARM/neon-shift-encoding.ll @@ -0,0 +1,136 @@ +; RUN: llc -show-mc-encoding -march=arm -mcpu=cortex-a8 -mattr=+neon < %s | FileCheck %s + +; CHECK: vshls_8xi8 +define <8 x i8> @vshls_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B +; CHECK: vshl.u8 d16, d17, d16 @ encoding: [0xa1,0x04,0x40,0xf3] + %tmp3 = shl <8 x i8> %tmp1, %tmp2 + ret <8 x i8> %tmp3 +} + +; CHECK: vshls_4xi16 +define <4 x i16> @vshls_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B +; CHECK: vshl.u16 d16, d17, d16 @ encoding: [0xa1,0x04,0x50,0xf3] + %tmp3 = shl <4 x i16> %tmp1, %tmp2 + ret <4 x i16> %tmp3 +} + +; CHECK: vshls_2xi32 +define <2 x i32> @vshls_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B +; CHECK: vshl.u32 d16, d17, d16 @ encoding: [0xa1,0x04,0x60,0xf3] + %tmp3 = shl <2 x i32> %tmp1, %tmp2 + ret <2 x i32> %tmp3 +} + +; CHECK: vshls_1xi64 +define <1 x i64> @vshls_1xi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B +; CHECK: vshl.u64 d16, d17, d16 @ encoding: [0xa1,0x04,0x70,0xf3] + %tmp3 = shl <1 x i64> %tmp1, %tmp2 + ret <1 x i64> %tmp3 +} + +; CHECK: vshli_8xi8 +define <8 x i8> @vshli_8xi8(<8 x i8>* %A) nounwind { + %tmp1 = load <8 x i8>* %A +; CHECK: vshl.i8 d16, d16, #7 @ encoding: [0x30,0x05,0xcf,0xf2] + %tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > + ret <8 x i8> %tmp2 +} + +; CHECK: vshli_4xi16 +define <4 x i16> @vshli_4xi16(<4 x i16>* %A) nounwind { + %tmp1 = load <4 x i16>* %A +; CHECK: vshl.i16 d16, d16, #15 @ encoding: [0x30,0x05,0xdf,0xf2 + %tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 > + ret <4 x i16> %tmp2 +} + +; CHECK: vshli_2xi32 +define <2 x i32> @vshli_2xi32(<2 x i32>* %A) nounwind { + %tmp1 = load <2 x i32>* %A +; CHECK: vshl.i32 d16, d16, #31 @ encoding: [0x30,0x05,0xff,0xf2] + %tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 > + ret <2 x i32> %tmp2 +} + +; CHECK: vshli_1xi64 +define <1 x i64> @vshli_1xi64(<1 x i64>* %A) nounwind { + %tmp1 = load <1 x i64>* %A +; CHECK: vshl.i64 d16, d16, #63 @ encoding: [0xb0,0x05,0xff,0xf2] + %tmp2 = shl <1 x i64> %tmp1, < i64 63 > + ret <1 x i64> %tmp2 +} + +; CHECK: vshls_16xi8 +define <16 x i8> @vshls_16xi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B +; CHECK: vshl.u8 q8, q8, q9 @ encoding: [0xe0,0x04,0x42,0xf3] + %tmp3 = shl <16 x i8> %tmp1, %tmp2 + ret <16 x i8> %tmp3 +} + +; CHECK: vshls_8xi16 +define <8 x i16> @vshls_8xi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shl <8 x i16> %tmp1, %tmp2 + ret <8 x i16> %tmp3 +} + +; CHECK: vshls_4xi32 +define <4 x i32> @vshls_4xi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B +; CHECK: vshl.u32 q8, q8, q9 @ encoding: [0xe0,0x04,0x62,0xf3] + %tmp3 = shl <4 x i32> %tmp1, %tmp2 + ret <4 x i32> %tmp3 +} + +; CHECK: vshls_2xi64 +define <2 x i64> @vshls_2xi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B +; CHECK: vshl.u64 q8, q8, q9 @ encoding: [0xe0,0x04,0x72,0xf3] + %tmp3 = shl <2 x i64> %tmp1, %tmp2 + ret <2 x i64> %tmp3 +} + +; CHECK: vshli_16xi8 +define <16 x i8> @vshli_16xi8(<16 x i8>* %A) nounwind { + %tmp1 = load <16 x i8>* %A +; CHECK: vshl.i8 q8, q8, #7 @ encoding: [0x70,0x05,0xcf,0xf2] + %tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > + ret <16 x i8> %tmp2 +} + +; CHECK: vshli_8xi16 +define <8 x i16> @vshli_8xi16(<8 x i16>* %A) nounwind { + %tmp1 = load <8 x i16>* %A +; CHECK: vshl.i16 q8, q8, #15 @ encoding: [0x70,0x05,0xdf,0xf2] + %tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 > + ret <8 x i16> %tmp2 +} + +; CHECK: vshli_4xi32 +define <4 x i32> @vshli_4xi32(<4 x i32>* %A) nounwind { + %tmp1 = load <4 x i32>* %A +; CHECK: vshl.i32 q8, q8, #31 @ encoding: [0x70,0x05,0xff,0xf2] + %tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 > + ret <4 x i32> %tmp2 +} + +; CHECK: vshli_2xi64 +define <2 x i64> @vshli_2xi64(<2 x i64>* %A) nounwind { + %tmp1 = load <2 x i64>* %A +; CHECK: vshl.i64 q8, q8, #63 @ encoding: [0xf0,0x05,0xff,0xf2] + %tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 > + ret <2 x i64> %tmp2 +}