From 0e2037ba2baed90310f7ba21c4557eb49da05938 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 20 Jan 2012 05:53:00 +0000 Subject: [PATCH] Add support for selecting 256-bit PALIGNR. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148532 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 96 ++++++++++++++++++++++-------- lib/Target/X86/X86InstrSSE.td | 11 ++++ test/CodeGen/X86/avx2-palignr.ll | 57 ++++++++++++++++++ 3 files changed, 139 insertions(+), 25 deletions(-) create mode 100644 test/CodeGen/X86/avx2-palignr.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 60ecf3f43f6..808daffc695 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3253,35 +3253,74 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { /// isPALIGNRMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PALIGNR. -static bool isPALIGNRMask(ArrayRef Mask, EVT VT, bool hasSSSE3) { - int i, e = VT.getVectorNumElements(); - if (VT.getSizeInBits() != 128) +static bool isPALIGNRMask(ArrayRef Mask, EVT VT, + const X86Subtarget *Subtarget) { + if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) || + (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())) return false; - // Do not handle v2i64 / v2f64 shuffles with palignr. - if (e < 4 || !hasSSSE3) + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; + + // Do not handle 64-bit element shuffles with palignr. + if (NumLaneElts == 2) return false; - for (i = 0; i != e; ++i) - if (Mask[i] >= 0) - break; + for (unsigned l = 0; l != NumElts; l+=NumLaneElts) { + unsigned i; + for (i = 0; i != NumLaneElts; ++i) { + if (Mask[i+l] >= 0) + break; + } - // All undef, not a palignr. - if (i == e) - return false; + // Lane is all undef, go to next lane + if (i == NumLaneElts) + continue; - // Make sure we're shifting in the right direction. - if (Mask[i] <= i) - return false; + int Start = Mask[i+l]; - int s = Mask[i] - i; - - // Check the rest of the elements to see if they are consecutive. - for (++i; i != e; ++i) { - int m = Mask[i]; - if (m >= 0 && m != s+i) + // Make sure its in this lane in one of the sources + if (!isUndefOrInRange(Start, l, l+NumLaneElts) && + !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts)) return false; + + // If not lane 0, then we must match lane 0 + if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l)) + return false; + + // Correct second source to be contiguous with first source + if (Start >= (int)NumElts) + Start -= NumElts - NumLaneElts; + + // Make sure we're shifting in the right direction. + if (Start <= (int)(i+l)) + return false; + + Start -= i; + + // Check the rest of the elements to see if they are consecutive. + for (++i; i != NumLaneElts; ++i) { + int Idx = Mask[i+l]; + + // Make sure its in this lane + if (!isUndefOrInRange(Idx, l, l+NumLaneElts) && + !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts)) + return false; + + // If not lane 0, then we must match lane 0 + if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l)) + return false; + + if (Idx >= (int)NumElts) + Idx -= NumElts - NumLaneElts; + + if (!isUndefOrEqual(Idx, Start+i)) + return false; + + } } + return true; } @@ -3983,14 +4022,21 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { EVT VT = SVOp->getValueType(0); unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3; - int Val = 0; - unsigned i, e; - for (i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; + + int Val = 0; + unsigned i; + for (i = 0; i != NumElts; ++i) { Val = SVOp->getMaskElt(i); if (Val >= 0) break; } + if (Val >= (int)NumElts) + Val -= NumElts - NumLaneElts; + assert(Val - i > 0 && "PALIGNR imm should be positive"); return (Val - i) * EltSize; } @@ -6626,7 +6672,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // inlined here right now to enable us to directly emit target specific // nodes, and remove one by one until they don't return Op anymore. - if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3())) + if (isPALIGNRMask(M, VT, Subtarget)) return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, getShufflePALIGNRImmediate(SVOp), DAG); @@ -11089,7 +11135,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, isPSHUFDMask(M, VT) || isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || - isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) || + isPALIGNRMask(M, VT, Subtarget) || isUNPCKLMask(M, VT, Subtarget->hasAVX2()) || isUNPCKHMask(M, VT, Subtarget->hasAVX2()) || isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasAVX2()) || diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e5f064aec71..e77c254cd7c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5476,6 +5476,17 @@ let Predicates = [HasAVX2] in let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in defm PALIGN : ssse3_palign<"palignr">; +let Predicates = [HasAVX2] in { +def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; +def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; +def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; +def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; +} + let Predicates = [HasAVX] in { def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; diff --git a/test/CodeGen/X86/avx2-palignr.ll b/test/CodeGen/X86/avx2-palignr.ll new file mode 100644 index 00000000000..53b9da32ae8 --- /dev/null +++ b/test/CodeGen/X86/avx2-palignr.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s + +define <8 x i32> @test1(<8 x i32> %A, <8 x i32> %B) nounwind { +; CHECK: test1: +; CHECK: vpalignr $4 + %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> + ret <8 x i32> %C +} + +define <8 x i32> @test2(<8 x i32> %A, <8 x i32> %B) nounwind { +; CHECK: test2: +; CHECK: vpalignr $4 + %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> + ret <8 x i32> %C +} + +define <8 x i32> @test3(<8 x i32> %A, <8 x i32> %B) nounwind { +; CHECK: test3: +; CHECK: vpalignr $4 + %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> + ret <8 x i32> %C +} +; +define <8 x i32> @test4(<8 x i32> %A, <8 x i32> %B) nounwind { +; CHECK: test4: +; CHECK: vpalignr $8 + %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> + ret <8 x i32> %C +} + +define <16 x i16> @test5(<16 x i16> %A, <16 x i16> %B) nounwind { +; CHECK: test5: +; CHECK: vpalignr $6 + %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> + ret <16 x i16> %C +} + +define <16 x i16> @test6(<16 x i16> %A, <16 x i16> %B) nounwind { +; CHECK: test6: +; CHECK: vpalignr $6 + %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> + ret <16 x i16> %C +} + +define <16 x i16> @test7(<16 x i16> %A, <16 x i16> %B) nounwind { +; CHECK: test7: +; CHECK: vpalignr $6 + %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> + ret <16 x i16> %C +} + +define <32 x i8> @test8(<32 x i8> %A, <32 x i8> %B) nounwind { +; CHECK: test8: +; CHECK: palignr $5 + %C = shufflevector <32 x i8> %A, <32 x i8> %B, <32 x i32> + ret <32 x i8> %C +}