From 05a8a724e2c0729c7d0989a4b203dce616534fb9 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 21 Sep 2014 09:35:25 +0000 Subject: [PATCH] [x86] Fix a helper to reflect that what we actually care about is 128-bit lane crossings, not 'half' crossings. This came up in code review ages ago, but I hadn't really addresesd it. Also added some documentation for the helper. No functionality changed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218203 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7e43524627a..b00709c2488 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9106,13 +9106,16 @@ static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, } } -static bool isHalfCrossingShuffleMask(ArrayRef Mask) { +/// \brief Test whether there are elements crossing 128-bit lanes in this +/// shuffle mask. +/// +/// X86 divides up its shuffles into in-lane and cross-lane shuffle operations +/// and we routinely test for these. +static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef Mask) { + int LaneSize = 128 / VT.getScalarSizeInBits(); int Size = Mask.size(); - for (int M : Mask.slice(0, Size / 2)) - if (M >= 0 && (M % Size) >= Size / 2) - return true; - for (int M : Mask.slice(Size / 2, Size / 2)) - if (M >= 0 && (M % Size) < Size / 2) + for (int i = 0; i < Size; ++i) + if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize) return true; return false; } @@ -9200,7 +9203,7 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // shuffles aren't a problem and FP and int have the same patterns. // FIXME: We can handle these more cleverly than splitting for v4f64. - if (isHalfCrossingShuffleMask(Mask)) + if (is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) return splitAndLower256BitVectorShuffle(Op, V1, V2, Subtarget, DAG); if (isSingleInputShuffleMask(Mask)) { @@ -9281,7 +9284,7 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // FIXME: If we have AVX2, we should delegate to generic code as crossing // shuffles aren't a problem and FP and int have the same patterns. - if (isHalfCrossingShuffleMask(Mask)) + if (is128BitLaneCrossingShuffleMask(MVT::v4i64, Mask)) return splitAndLower256BitVectorShuffle(Op, V1, V2, Subtarget, DAG); // AVX1 doesn't provide any facilities for v4i64 shuffles, bitcast and @@ -9306,7 +9309,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - if (isHalfCrossingShuffleMask(Mask) || + if (is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask) || isSingleInputShuffleMask(Mask)) return splitAndLower256BitVectorShuffle(Op, V1, V2, Subtarget, DAG);