[x86] Rewrite the byte shift detection to not use boolean variables to

track state.

I didn't like this in the code review because the pattern tends to be
error prone, but I didn't see a clear way to rewrite it. Turns out that
there were bugs here, I found them when fuzz testing our shuffle
lowering for correctness on x86.

The core of the problem is that we need to consistently test all our
preconditions for the same directionality of shift and the same input
vector. Instead, formulate this as two predicates (one doesn't depend on
the input in any way), pass things like the directionality and input
vector as inputs, and loop over the alternatives.

This fixes a pattern of very rare miscompiles coming out of this code.
Turned up roughly 4 out of every 1 million v8 shuffles in my fuzz
testing. The new code is over half a million test runs with no failures
yet. I've also fuzzed every other function in the lowering code with
over 3.5 million test cases and not discovered any other miscompiles.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229642 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2015-02-18 07:13:48 +00:00
parent f781855732
commit c9520b48ae
2 changed files with 54 additions and 23 deletions

View File

@ -7888,40 +7888,39 @@ static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
// [ 5, 6, 7, zz, zz, zz, zz, zz]
// [ -1, 5, 6, 7, zz, zz, zz, zz]
// [ 1, 2, -1, -1, -1, -1, zz, zz]
auto MatchByteShift = [&](int Shift) -> SDValue {
bool MatchLeft = true, MatchRight = true;
for (int l = 0; l < NumElts; l += NumLaneElts) {
for (int i = 0; i < Shift; ++i)
MatchLeft &= Zeroable[l + i];
for (int i = NumLaneElts - Shift; i < NumLaneElts; ++i)
MatchRight &= Zeroable[l + i];
}
if (!(MatchLeft || MatchRight))
return SDValue();
bool MatchV1 = true, MatchV2 = true;
auto CheckZeros = [&](int Shift, bool LeftShift) {
for (int l = 0; l < NumElts; l += NumLaneElts)
for (int i = 0; i < Shift; ++i)
if (!Zeroable[l + i + (LeftShift ? 0 : (NumLaneElts - Shift))])
return false;
return true;
};
auto MatchByteShift = [&](int Shift, bool LeftShift, SDValue V) {
for (int l = 0; l < NumElts; l += NumLaneElts) {
unsigned Pos = MatchLeft ? Shift + l : l;
unsigned Low = MatchLeft ? l : Shift + l;
unsigned Pos = LeftShift ? Shift + l : l;
unsigned Low = LeftShift ? l : Shift + l;
unsigned Len = NumLaneElts - Shift;
MatchV1 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low);
MatchV2 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low + NumElts);
if (!isSequentialOrUndefInRange(Mask, Pos, Len,
Low + (V == V1 ? 0 : NumElts)))
return SDValue();
}
if (!(MatchV1 || MatchV2))
return SDValue();
int ByteShift = Shift * Scale;
unsigned Op = MatchRight ? X86ISD::VSRLDQ : X86ISD::VSHLDQ;
SDValue V = MatchV1 ? V1 : V2;
unsigned Op = LeftShift ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
V = DAG.getNode(Op, DL, ShiftVT, V,
DAG.getConstant(ByteShift, MVT::i8));
V = DAG.getNode(Op, DL, ShiftVT, V, DAG.getConstant(ByteShift, MVT::i8));
return DAG.getNode(ISD::BITCAST, DL, VT, V);
};
for (int Shift = 1; Shift < NumLaneElts; ++Shift)
if (SDValue S = MatchByteShift(Shift))
return S;
for (bool LeftShift : {true, false})
if (CheckZeros(Shift, LeftShift))
for (SDValue V : {V1, V2})
if (SDValue S = MatchByteShift(Shift, LeftShift, V))
return S;
// no match
return SDValue();

View File

@ -2146,3 +2146,35 @@ define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_fu3ucc5u:
; SSE2: # BB#0:
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_fu3ucc5u:
; SSSE3: # BB#0:
; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v8i16_fu3ucc5u:
; SSE41: # BB#0:
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6],xmm1[7]
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15]
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
; AVX: # BB#0:
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6],xmm1[7]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15]
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
ret <8 x i16> %shuffle
}