From 8bb9e48752b4a88e512ceb8fb802e2cdf8150e7b Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Sun, 26 Jul 2009 00:39:34 +0000 Subject: [PATCH] Add support for ARM Neon VREV instructions. Patch by Anton Korzh, with some modifications from me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77101 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 24 ++++++ lib/Target/ARM/ARMISelLowering.h | 5 ++ lib/Target/ARM/ARMInstrNEON.td | 72 ++++++++++++++++++ test/CodeGen/ARM/vrev.ll | 113 +++++++++++++++++++++++++++++ 4 files changed, 214 insertions(+) create mode 100644 test/CodeGen/ARM/vrev.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 7e2bbcdc974..226f1618c9a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2188,6 +2188,30 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { SplatBitSize, DAG); } +/// isVREVMask - Check if a vector shuffle corresponds to a VREV +/// instruction with the specified blocksize. (The order of the elements +/// within each block of the vector is reversed.) +bool ARM::isVREVMask(ShuffleVectorSDNode *N, unsigned BlockSize) { + assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && + "Only possible block sizes for VREV are: 16, 32, 64"); + + MVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + unsigned BlockElts = N->getMaskElt(0) + 1; + + if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) + return false; + + for (unsigned i = 0; i < NumElts; ++i) { + if ((unsigned) N->getMaskElt(i) != + (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) + return false; + } + + return true; +} + static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) { // Canonicalize all-zeros and all-ones vectors. ConstantSDNode *ConstVal = dyn_cast(Val.getNode()); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 10f9cea1a9d..d0806fb9c1d 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -125,6 +125,11 @@ namespace llvm { /// return the constant being splatted. The ByteSize field indicates the /// number of bytes of each element [1248]. SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + + /// isVREVMask - Check if a vector shuffle corresponds to a VREV + /// instruction with the specified blocksize. (The order of the elements + /// within each block of the vector is reversed.) + bool isVREVMask(ShuffleVectorSDNode *N, unsigned blocksize); } //===--------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 2b7192321dd..9415b40e768 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1662,6 +1662,78 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; +// VREV : Vector Reverse + +def vrev64_shuffle : PatFrag<(ops node:$in), + (vector_shuffle node:$in, undef), [{ + ShuffleVectorSDNode *SVOp = cast(N); + return ARM::isVREVMask(SVOp, 64); +}]>; + +def vrev32_shuffle : PatFrag<(ops node:$in), + (vector_shuffle node:$in, undef), [{ + ShuffleVectorSDNode *SVOp = cast(N); + return ARM::isVREVMask(SVOp, 32); +}]>; + +def vrev16_shuffle : PatFrag<(ops node:$in), + (vector_shuffle node:$in, undef), [{ + ShuffleVectorSDNode *SVOp = cast(N); + return ARM::isVREVMask(SVOp, 16); +}]>; + +// VREV64 : Vector Reverse elements within 64-bit doublewords + +class VREV64D op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), + (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (Ty (vrev64_shuffle (Ty DPR:$src))))]>; +class VREV64Q op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), + (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (Ty (vrev64_shuffle (Ty QPR:$src))))]>; + +def VREV64d8 : VREV64D<0b00, "vrev64.8", v8i8>; +def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>; +def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>; +def VREV64df : VREV64D<0b10, "vrev64.32", v2f32>; + +def VREV64q8 : VREV64Q<0b00, "vrev64.8", v16i8>; +def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>; +def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>; +def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>; + +// VREV32 : Vector Reverse elements within 32-bit words + +class VREV32D op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), + (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (Ty (vrev32_shuffle (Ty DPR:$src))))]>; +class VREV32Q op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), + (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (Ty (vrev32_shuffle (Ty QPR:$src))))]>; + +def VREV32d8 : VREV32D<0b00, "vrev32.8", v8i8>; +def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>; + +def VREV32q8 : VREV32Q<0b00, "vrev32.8", v16i8>; +def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>; + +// VREV16 : Vector Reverse elements within 16-bit halfwords + +class VREV16D op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), + (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (Ty (vrev16_shuffle (Ty DPR:$src))))]>; +class VREV16Q op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), + (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (Ty (vrev16_shuffle (Ty QPR:$src))))]>; + +def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>; +def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll new file mode 100644 index 00000000000..994d89d31c7 --- /dev/null +++ b/test/CodeGen/ARM/vrev.ll @@ -0,0 +1,113 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s + +define arm_apcscc <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind { +;CHECK: test_vrev64D8: +;CHECK: vrev64.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + ret <8 x i8> %tmp2 +} + +define arm_apcscc <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind { +;CHECK: test_vrev64D16: +;CHECK: vrev64.16 + %tmp1 = load <4 x i16>* %A + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> + ret <4 x i16> %tmp2 +} + +define arm_apcscc <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind { +;CHECK: test_vrev64D32: +;CHECK: vrev64.32 + %tmp1 = load <2 x i32>* %A + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> + ret <2 x i32> %tmp2 +} + +define arm_apcscc <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind { +;CHECK: test_vrev64Df: +;CHECK: vrev64.32 + %tmp1 = load <2 x float>* %A + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> + ret <2 x float> %tmp2 +} + +define arm_apcscc <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind { +;CHECK: test_vrev64Q8: +;CHECK: vrev64.8 + %tmp1 = load <16 x i8>* %A + %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> + ret <16 x i8> %tmp2 +} + +define arm_apcscc <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind { +;CHECK: test_vrev64Q16: +;CHECK: vrev64.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> + ret <8 x i16> %tmp2 +} + +define arm_apcscc <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind { +;CHECK: test_vrev64Q32: +;CHECK: vrev64.32 + %tmp1 = load <4 x i32>* %A + %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %tmp2 +} + +define arm_apcscc <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind { +;CHECK: test_vrev64Qf: +;CHECK: vrev64.32 + %tmp1 = load <4 x float>* %A + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> + ret <4 x float> %tmp2 +} + +define arm_apcscc <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind { +;CHECK: test_vrev32D8: +;CHECK: vrev32.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + ret <8 x i8> %tmp2 +} + +define arm_apcscc <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind { +;CHECK: test_vrev32D16: +;CHECK: vrev32.16 + %tmp1 = load <4 x i16>* %A + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> + ret <4 x i16> %tmp2 +} + +define arm_apcscc <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind { +;CHECK: test_vrev32Q8: +;CHECK: vrev32.8 + %tmp1 = load <16 x i8>* %A + %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> + ret <16 x i8> %tmp2 +} + +define arm_apcscc <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind { +;CHECK: test_vrev32Q16: +;CHECK: vrev32.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> + ret <8 x i16> %tmp2 +} + +define arm_apcscc <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind { +;CHECK: test_vrev16D8: +;CHECK: vrev16.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + ret <8 x i8> %tmp2 +} + +define arm_apcscc <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind { +;CHECK: test_vrev16Q8: +;CHECK: vrev16.8 + %tmp1 = load <16 x i8>* %A + %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> + ret <16 x i8> %tmp2 +}