From d9767021f83879429e930b068d1d6aef22285b33 Mon Sep 17 00:00:00 2001 From: Hao Liu <Hao.Liu@arm.com> Date: Thu, 15 Aug 2013 08:26:11 +0000 Subject: [PATCH] Clang and AArch64 backend patches to support shll/shl and vmovl instructions and ACLE functions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188451 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 52 +++++ lib/Target/AArch64/AArch64ISelLowering.h | 5 +- lib/Target/AArch64/AArch64InstrFormats.td | 19 ++ lib/Target/AArch64/AArch64InstrNEON.td | 145 +++++++++++++ test/CodeGen/AArch64/neon-shift-left-long.ll | 193 ++++++++++++++++++ test/CodeGen/AArch64/neon-shift.ll | 48 +++++ test/MC/AArch64/neon-diagnostics.s | 83 ++++++++ test/MC/AArch64/neon-shift-left-long.s | 37 ++++ test/MC/AArch64/neon-shift.s | 18 ++ .../AArch64/neon-instructions.txt | 23 +++ 10 files changed, 622 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AArch64/neon-shift-left-long.ll create mode 100644 test/MC/AArch64/neon-shift-left-long.s diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index ec838fb071f..d12302e685b 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -77,6 +77,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SHL); // AArch64 does not have i1 loads, or much of anything for i1 really. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -3235,6 +3236,56 @@ static SDValue PerformSRACombine(SDNode *N, DAG.getConstant(LSB + Width - 1, MVT::i64)); } +/// Check if this is a valid build_vector for the immediate operand of +/// a vector shift operation, where all the elements of the build_vector +/// must have the same constant integer value. +static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { + // Ignore bit_converts. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, + HasAnyUndefs, ElementBits) || + SplatBitSize > ElementBits) + return false; + Cnt = SplatBits.getSExtValue(); + return true; +} + +/// Check if this is a valid build_vector for the immediate operand of +/// a vector shift left operation. That value must be in the range: +/// 0 <= Value < ElementBits for a left shift +static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (!getVShiftImm(Op, ElementBits, Cnt)) + return false; + return (Cnt >= 0 && Cnt < ElementBits); +} + +static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *ST) { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + // Nothing to be done for scalar shifts. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!VT.isVector() || !TLI.isTypeLegal(VT)) + return SDValue(); + + assert(ST->hasNEON() && "unexpected vector shift"); + int64_t Cnt; + if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { + SDValue RHS = DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(0)), + VT, DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); + } + + return SDValue(); +} SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, @@ -3244,6 +3295,7 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return PerformANDCombine(N, DCI); case ISD::OR: return PerformORCombine(N, DCI, getSubtarget()); case ISD::SRA: return PerformSRACombine(N, DCI); + case ISD::SHL: return PerformSHLCombine(N, DCI, getSubtarget()); } return SDValue(); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 67a908e24ef..c9795b249e3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -132,7 +132,10 @@ namespace AArch64ISD { NEON_CMPZ, // Vector compare bitwise test - NEON_TST + NEON_TST, + + // Operation for the immediate in vector shift + NEON_DUPIMM }; } diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 09451fdc45d..020ee6c238b 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1050,5 +1050,24 @@ class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode, // Inherit Rd in 4-0 } +// Format AdvSIMD 2 vector 1 immediate shift +class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ + bits<7> Imm; + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = u; + let Inst{28-23} = 0b011110; + let Inst{22-16} = Imm; + let Inst{15-11} = opcode; + let Inst{10} = 0b1; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 98b9e3e1158..175c3aa656b 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -41,6 +41,9 @@ def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3, def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; +def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1, + [SDTCisVec<0>, SDTCisVT<1, i32>]>>; + //===----------------------------------------------------------------------===// // Multiclasses //===----------------------------------------------------------------------===// @@ -1409,6 +1412,148 @@ def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>; def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; } +// Vector Shift (Immediate) + +def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 63; }]> { + let ParserMatchClass = uimm6_asmoperand; +} + +class N2VShiftLeft<bit q, bit u, bits<5> opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy> + : NeonI_2VShiftImm<q, u, opcode, + (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), + asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", + [(set (Ty VPRC:$Rd), + (Ty (shl (Ty VPRC:$Rn), + (Ty (Neon_dupImm (i32 imm:$Imm))))))], + NoItinerary>; + +multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> { + // 64-bit vector types. + def _8B : N2VShiftLeft<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3> { + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _4H : N2VShiftLeft<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4> { + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _2S : N2VShiftLeft<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5> { + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + // 128-bit vector types. + def _16B : N2VShiftLeft<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3> { + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _8H : N2VShiftLeft<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4> { + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _4S : N2VShiftLeft<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5> { + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + def _2D : N2VShiftLeft<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63> { + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + } +} + +def Neon_top16B : PatFrag<(ops node:$in), + (extract_subvector (v16i8 node:$in), (iPTR 8))>; +def Neon_top8H : PatFrag<(ops node:$in), + (extract_subvector (v8i16 node:$in), (iPTR 4))>; +def Neon_top4S : PatFrag<(ops node:$in), + (extract_subvector (v4i32 node:$in), (iPTR 2))>; + +class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT, + string SrcT, ValueType DestTy, ValueType SrcTy, + Operand ImmTy, SDPatternOperator ExtOp> + : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), + (ins VPR64:$Rn, ImmTy:$Imm), + asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", + [(set (DestTy VPR128:$Rd), + (DestTy (shl + (DestTy (ExtOp (SrcTy VPR64:$Rn))), + (DestTy (Neon_dupImm (i32 imm:$Imm))))))], + NoItinerary>; + +class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT, + string SrcT, ValueType DestTy, ValueType SrcTy, + int StartIndex, Operand ImmTy, + SDPatternOperator ExtOp, PatFrag getTop> + : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), + (ins VPR128:$Rn, ImmTy:$Imm), + asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", + [(set (DestTy VPR128:$Rd), + (DestTy (shl + (DestTy (ExtOp + (SrcTy (getTop VPR128:$Rn)))), + (DestTy (Neon_dupImm (i32 imm:$Imm))))))], + NoItinerary>; + +multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop, + SDNode ExtOp> { + // 64-bit vector types. + def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8, + uimm3, ExtOp>{ + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16, + uimm4, ExtOp>{ + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32, + uimm5, ExtOp>{ + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + // 128-bit vector types + def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", + v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B>{ + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", + v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H>{ + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", + v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S>{ + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + // Use other patterns to match when the immediate is 0. + def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))), + (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>; + + def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))), + (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>; + + def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))), + (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>; + + def : Pat<(v8i16 (ExtOp (v8i8 (Neon_top16B VPR128:$Rn)))), + (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>; + + def : Pat<(v4i32 (ExtOp (v4i16 (Neon_top8H VPR128:$Rn)))), + (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>; + + def : Pat<(v2i64 (ExtOp (v2i32 (Neon_top4S VPR128:$Rn)))), + (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>; +} + +// Shift left immediate +defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; + +// Shift left long immediate +defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>; +defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>; + // Scalar Arithmetic class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop> diff --git a/test/CodeGen/AArch64/neon-shift-left-long.ll b/test/CodeGen/AArch64/neon-shift-left-long.ll new file mode 100644 index 00000000000..d45c47685b0 --- /dev/null +++ b/test/CodeGen/AArch64/neon-shift-left-long.ll @@ -0,0 +1,193 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) { +; CHECK: test_sshll_v8i8: +; CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3 + %1 = sext <8 x i8> %a to <8 x i16> + %tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + ret <8 x i16> %tmp +} + +define <4 x i32> @test_sshll_v4i16(<4 x i16> %a) { +; CHECK: test_sshll_v4i16: +; CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9 + %1 = sext <4 x i16> %a to <4 x i32> + %tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9> + ret <4 x i32> %tmp +} + +define <2 x i64> @test_sshll_v2i32(<2 x i32> %a) { +; CHECK: test_sshll_v2i32: +; CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19 + %1 = sext <2 x i32> %a to <2 x i64> + %tmp = shl <2 x i64> %1, <i64 19, i64 19> + ret <2 x i64> %tmp +} + +define <8 x i16> @test_ushll_v8i8(<8 x i8> %a) { +; CHECK: test_ushll_v8i8: +; CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3 + %1 = zext <8 x i8> %a to <8 x i16> + %tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + ret <8 x i16> %tmp +} + +define <4 x i32> @test_ushll_v4i16(<4 x i16> %a) { +; CHECK: test_ushll_v4i16: +; CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9 + %1 = zext <4 x i16> %a to <4 x i32> + %tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9> + ret <4 x i32> %tmp +} + +define <2 x i64> @test_ushll_v2i32(<2 x i32> %a) { +; CHECK: test_ushll_v2i32: +; CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19 + %1 = zext <2 x i32> %a to <2 x i64> + %tmp = shl <2 x i64> %1, <i64 19, i64 19> + ret <2 x i64> %tmp +} + +define <8 x i16> @test_sshll2_v16i8(<16 x i8> %a) { +; CHECK: test_sshll2_v16i8: +; CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3 + %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %2 = sext <8 x i8> %1 to <8 x i16> + %tmp = shl <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + ret <8 x i16> %tmp +} + +define <4 x i32> @test_sshll2_v8i16(<8 x i16> %a) { +; CHECK: test_sshll2_v8i16: +; CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9 + %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %2 = sext <4 x i16> %1 to <4 x i32> + %tmp = shl <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9> + ret <4 x i32> %tmp +} + +define <2 x i64> @test_sshll2_v4i32(<4 x i32> %a) { +; CHECK: test_sshll2_v4i32: +; CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19 + %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %2 = sext <2 x i32> %1 to <2 x i64> + %tmp = shl <2 x i64> %2, <i64 19, i64 19> + ret <2 x i64> %tmp +} + +define <8 x i16> @test_ushll2_v16i8(<16 x i8> %a) { +; CHECK: test_ushll2_v16i8: +; CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3 + %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %2 = zext <8 x i8> %1 to <8 x i16> + %tmp = shl <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + ret <8 x i16> %tmp +} + +define <4 x i32> @test_ushll2_v8i16(<8 x i16> %a) { +; CHECK: test_ushll2_v8i16: +; CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9 + %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %2 = zext <4 x i16> %1 to <4 x i32> + %tmp = shl <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9> + ret <4 x i32> %tmp +} + +define <2 x i64> @test_ushll2_v4i32(<4 x i32> %a) { +; CHECK: test_ushll2_v4i32: +; CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19 + %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %2 = zext <2 x i32> %1 to <2 x i64> + %tmp = shl <2 x i64> %2, <i64 19, i64 19> + ret <2 x i64> %tmp +} + +define <8 x i16> @test_sshll_shl0_v8i8(<8 x i8> %a) { +; CHECK: test_sshll_shl0_v8i8: +; CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0 + %tmp = sext <8 x i8> %a to <8 x i16> + ret <8 x i16> %tmp +} + +define <4 x i32> @test_sshll_shl0_v4i16(<4 x i16> %a) { +; CHECK: test_sshll_shl0_v4i16: +; CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0 + %tmp = sext <4 x i16> %a to <4 x i32> + ret <4 x i32> %tmp +} + +define <2 x i64> @test_sshll_shl0_v2i32(<2 x i32> %a) { +; CHECK: test_sshll_shl0_v2i32: +; CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0 + %tmp = sext <2 x i32> %a to <2 x i64> + ret <2 x i64> %tmp +} + +define <8 x i16> @test_ushll_shl0_v8i8(<8 x i8> %a) { +; CHECK: test_ushll_shl0_v8i8: +; CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0 + %tmp = zext <8 x i8> %a to <8 x i16> + ret <8 x i16> %tmp +} + +define <4 x i32> @test_ushll_shl0_v4i16(<4 x i16> %a) { +; CHECK: test_ushll_shl0_v4i16: +; CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0 + %tmp = zext <4 x i16> %a to <4 x i32> + ret <4 x i32> %tmp +} + +define <2 x i64> @test_ushll_shl0_v2i32(<2 x i32> %a) { +; CHECK: test_ushll_shl0_v2i32: +; CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0 + %tmp = zext <2 x i32> %a to <2 x i64> + ret <2 x i64> %tmp +} + +define <8 x i16> @test_sshll2_shl0_v16i8(<16 x i8> %a) { +; CHECK: test_sshll2_shl0_v16i8: +; CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0 + %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %tmp = sext <8 x i8> %1 to <8 x i16> + ret <8 x i16> %tmp +} + +define <4 x i32> @test_sshll2_shl0_v8i16(<8 x i16> %a) { +; CHECK: test_sshll2_shl0_v8i16: +; CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0 + %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %tmp = sext <4 x i16> %1 to <4 x i32> + ret <4 x i32> %tmp +} + +define <2 x i64> @test_sshll2_shl0_v4i32(<4 x i32> %a) { +; CHECK: test_sshll2_shl0_v4i32: +; CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0 + %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %tmp = sext <2 x i32> %1 to <2 x i64> + ret <2 x i64> %tmp +} + +define <8 x i16> @test_ushll2_shl0_v16i8(<16 x i8> %a) { +; CHECK: test_ushll2_shl0_v16i8: +; CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0 + %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %tmp = zext <8 x i8> %1 to <8 x i16> + ret <8 x i16> %tmp +} + +define <4 x i32> @test_ushll2_shl0_v8i16(<8 x i16> %a) { +; CHECK: test_ushll2_shl0_v8i16: +; CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0 + %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %tmp = zext <4 x i16> %1 to <4 x i32> + ret <4 x i32> %tmp +} + +define <2 x i64> @test_ushll2_shl0_v4i32(<4 x i32> %a) { +; CHECK: test_ushll2_shl0_v4i32: +; CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0 + %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %tmp = zext <2 x i32> %1 to <2 x i64> + ret <2 x i64> %tmp +} diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll index 45a26057996..9b11ba858e9 100644 --- a/test/CodeGen/AArch64/neon-shift.ll +++ b/test/CodeGen/AArch64/neon-shift.ll @@ -137,4 +137,52 @@ define <2 x i64> @test_sshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { } +define <8 x i8> @test_shl_v8i8(<8 x i8> %a) { +; CHECK: test_shl_v8i8: +; CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %tmp = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + ret <8 x i8> %tmp +} + +define <4 x i16> @test_shl_v4i16(<4 x i16> %a) { +; CHECK: test_shl_v4i16: +; CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %tmp = shl <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3> + ret <4 x i16> %tmp +} + +define <2 x i32> @test_shl_v2i32(<2 x i32> %a) { +; CHECK: test_shl_v2i32: +; CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %tmp = shl <2 x i32> %a, <i32 3, i32 3> + ret <2 x i32> %tmp +} + +define <16 x i8> @test_shl_v16i8(<16 x i8> %a) { +; CHECK: test_shl_v16i8: +; CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %tmp = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + ret <16 x i8> %tmp +} + +define <8 x i16> @test_shl_v8i16(<8 x i16> %a) { +; CHECK: test_shl_v8i16: +; CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %tmp = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + ret <8 x i16> %tmp +} + +define <4 x i32> @test_shl_v4i32(<4 x i32> %a) { +; CHECK: test_shl_v4i32: +; CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %tmp = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> + ret <4 x i32> %tmp +} + +define <2 x i64> @test_shl_v2i64(<2 x i64> %a) { +; CHECK: test_shl_v2i64: +; CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %tmp = shl <2 x i64> %a, <i64 3, i64 3> + ret <2 x i64> %tmp +} diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 5373889222f..bc54b50eb2e 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -1205,3 +1205,86 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmulx v1.4h, v25.4h, v3.4h // CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector Shift Left by Immediate +//------------------------------------------------------------------------------ + // Mismatched vector types and out of range + shl v0.4s, v15,2s, #3 + shl v0.2d, v17.4s, #3 + shl v0.8b, v31.8b, #-1 + shl v0.8b, v31.8b, #8 + shl v0.4s, v21.4s, #32 + shl v0.2d, v1.2d, #64 + +// CHECK-ERROR: error: expected comma before next operand +// CHECK-ERROR: shl v0.4s, v15,2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shl v0.2d, v17.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 7] +// CHECK-ERROR: shl v0.8b, v31.8b, #-1 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 7] +// CHECK-ERROR: shl v0.8b, v31.8b, #8 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 31] +// CHECK-ERROR: shl v0.4s, v21.4s, #32 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 63] +// CHECK-ERROR: shl v0.2d, v1.2d, #64 +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Vector Shift Left Long by Immediate +//---------------------------------------------------------------------- + // Mismatched vector types + sshll v0.4s, v15.2s, #3 + ushll v1.16b, v25.16b, #6 + sshll2 v0.2d, v3.8s, #15 + ushll2 v1.4s, v25.4s, #7 + + // Out of range + sshll v0.8h, v1.8b, #-1 + sshll v0.8h, v1.8b, #9 + ushll v0.4s, v1.4h, #17 + ushll v0.2d, v1.2s, #33 + sshll2 v0.8h, v1.16b, #9 + sshll2 v0.4s, v1.8h, #17 + ushll2 v0.2d, v1.4s, #33 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sshll v0.4s, v15.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ushll v1.16b, v25.16b, #6 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sshll2 v0.2d, v3.8s, #15 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ushll2 v1.4s, v25.4s, #7 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 7] +// CHECK-ERROR: sshll v0.8h, v1.8b, #-1 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 7] +// CHECK-ERROR: sshll v0.8h, v1.8b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 15] +// CHECK-ERROR: ushll v0.4s, v1.4h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 31] +// CHECK-ERROR: ushll v0.2d, v1.2s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 7] +// CHECK-ERROR: sshll2 v0.8h, v1.16b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 15] +// CHECK-ERROR: sshll2 v0.4s, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 31] +// CHECK-ERROR: ushll2 v0.2d, v1.4s, #33 +// CHECK-ERROR: ^ + diff --git a/test/MC/AArch64/neon-shift-left-long.s b/test/MC/AArch64/neon-shift-left-long.s new file mode 100644 index 00000000000..97604587424 --- /dev/null +++ b/test/MC/AArch64/neon-shift-left-long.s @@ -0,0 +1,37 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + +//------------------------------------------------------------------------------ +// Integer shift left long (Signed) +//------------------------------------------------------------------------------ + sshll v0.8h, v1.8b, #3 + sshll v0.4s, v1.4h, #3 + sshll v0.2d, v1.2s, #3 + sshll2 v0.8h, v1.16b, #3 + sshll2 v0.4s, v1.8h, #3 + sshll2 v0.2d, v1.4s, #3 + +// CHECK: sshll v0.8h, v1.8b, #3 // encoding: [0x20,0xa4,0x0b,0x0f] +// CHECK: sshll v0.4s, v1.4h, #3 // encoding: [0x20,0xa4,0x13,0x0f] +// CHECK: sshll v0.2d, v1.2s, #3 // encoding: [0x20,0xa4,0x23,0x0f] +// CHECK: sshll2 v0.8h, v1.16b, #3 // encoding: [0x20,0xa4,0x0b,0x4f] +// CHECK: sshll2 v0.4s, v1.8h, #3 // encoding: [0x20,0xa4,0x13,0x4f] +// CHECK: sshll2 v0.2d, v1.4s, #3 // encoding: [0x20,0xa4,0x23,0x4f] + +//------------------------------------------------------------------------------ +// Integer shift left long (Unsigned) +//------------------------------------------------------------------------------ + ushll v0.8h, v1.8b, #3 + ushll v0.4s, v1.4h, #3 + ushll v0.2d, v1.2s, #3 + ushll2 v0.8h, v1.16b, #3 + ushll2 v0.4s, v1.8h, #3 + ushll2 v0.2d, v1.4s, #3 + +// CHECK: ushll v0.8h, v1.8b, #3 // encoding: [0x20,0xa4,0x0b,0x2f] +// CHECK: ushll v0.4s, v1.4h, #3 // encoding: [0x20,0xa4,0x13,0x2f] +// CHECK: ushll v0.2d, v1.2s, #3 // encoding: [0x20,0xa4,0x23,0x2f] +// CHECK: ushll2 v0.8h, v1.16b, #3 // encoding: [0x20,0xa4,0x0b,0x6f] +// CHECK: ushll2 v0.4s, v1.8h, #3 // encoding: [0x20,0xa4,0x13,0x6f] +// CHECK: ushll2 v0.2d, v1.4s, #3 // encoding: [0x20,0xa4,0x23,0x6f] diff --git a/test/MC/AArch64/neon-shift.s b/test/MC/AArch64/neon-shift.s index be1799e2c11..23d687c38c9 100644 --- a/test/MC/AArch64/neon-shift.s +++ b/test/MC/AArch64/neon-shift.s @@ -55,3 +55,21 @@ // CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e] +//------------------------------------------------------------------------------ +// Vector Integer Shift Left by Immediate +//------------------------------------------------------------------------------ + shl v0.8b, v1.8b, #3 + shl v0.4h, v1.4h, #3 + shl v0.2s, v1.2s, #3 + shl v0.16b, v1.16b, #3 + shl v0.8h, v1.8h, #3 + shl v0.4s, v1.4s, #3 + shl v0.2d, v1.2d, #3 + +// CHECK: shl v0.8b, v1.8b, #3 // encoding: [0x20,0x54,0x0b,0x0f] +// CHECK: shl v0.4h, v1.4h, #3 // encoding: [0x20,0x54,0x13,0x0f] +// CHECK: shl v0.2s, v1.2s, #3 // encoding: [0x20,0x54,0x23,0x0f] +// CHECK: shl v0.16b, v1.16b, #3 // encoding: [0x20,0x54,0x0b,0x4f] +// CHECK: shl v0.8h, v1.8h, #3 // encoding: [0x20,0x54,0x13,0x4f] +// CHECK: shl v0.4s, v1.4s, #3 // encoding: [0x20,0x54,0x23,0x4f] +// CHECK: shl v0.2d, v1.2d, #3 // encoding: [0x20,0x54,0x43,0x4f] diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index 40d1f4c66f8..e599abaacd0 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -671,3 +671,26 @@ 0xf5 0xdd 0x23 0x4e 0xab 0xdc 0x77 0x4e +#---------------------------------------------------------------------- +# Vector Shift Left by Immediate +#---------------------------------------------------------------------- +# CHECK: shl v0.4h, v1.4h, #3 +# CHECK: shl v0.16b, v1.16b, #3 +# CHECK: shl v0.4s, v1.4s, #3 +# CHECK: shl v0.2d, v1.2d, #3 +0x20,0x54,0x13,0x0f +0x20,0x54,0x0b,0x4f +0x20,0x54,0x23,0x4f +0x20,0x54,0x43,0x4f + +#---------------------------------------------------------------------- +# Vector Shift Left Long (Signed, Unsigned) by Immediate +#---------------------------------------------------------------------- +# CHECK: sshll v0.2d, v1.2s, #3 +# CHECK: sshll2 v0.4s, v1.8h, #3 +# CHECK: ushll v0.4s, v1.4h, #3 +# CHECK: ushll2 v0.8h, v1.16b, #3 +0x20 0xa4 0x23 0x0f +0x20 0xa4 0x13 0x4f +0x20 0xa4 0x13 0x2f +0x20 0xa4 0x0b 0x6f