diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4678340b6fb..09f0be0e246 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9875,27 +9875,36 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, static bool canWidenShuffleElements(ArrayRef Mask, SmallVectorImpl &WidenedMask) { for (int i = 0, Size = Mask.size(); i < Size; i += 2) { - // Check for any of the sentinel values (negative) and if they are the same, - // we can widen to that. - if (Mask[i] < 0 && Mask[i] == Mask[i + 1]) { - WidenedMask.push_back(Mask[i]); + // If both elements are undef, its trivial. + if (Mask[i] == SM_SentinelUndef && Mask[i + 1] == SM_SentinelUndef) { + WidenedMask.push_back(SM_SentinelUndef); continue; } // Check for an undef mask and a mask value properly aligned to fit with // a pair of values. If we find such a case, use the non-undef mask's value. - if (Mask[i] == -1 && Mask[i + 1] >= 0 && Mask[i + 1] % 2 == 1) { + if (Mask[i] == SM_SentinelUndef && Mask[i + 1] >= 0 && Mask[i + 1] % 2 == 1) { WidenedMask.push_back(Mask[i + 1] / 2); continue; } - if (Mask[i + 1] == -1 && Mask[i] >= 0 && Mask[i] % 2 == 0) { + if (Mask[i + 1] == SM_SentinelUndef && Mask[i] >= 0 && Mask[i] % 2 == 0) { WidenedMask.push_back(Mask[i] / 2); continue; } + // When zeroing, we need to spread the zeroing across both lanes to widen. + if (Mask[i] == SM_SentinelZero || Mask[i + 1] == SM_SentinelZero) { + if ((Mask[i] == SM_SentinelZero || Mask[i] == SM_SentinelUndef) && + (Mask[i + 1] == SM_SentinelZero || Mask[i + 1] == SM_SentinelUndef)) { + WidenedMask.push_back(SM_SentinelZero); + continue; + } + return false; + } + // Finally check if the two mask values are adjacent and aligned with // a pair. - if (Mask[i] != -1 && Mask[i] % 2 == 0 && Mask[i] + 1 == Mask[i + 1]) { + if (Mask[i] != SM_SentinelUndef && Mask[i] % 2 == 0 && Mask[i] + 1 == Mask[i + 1]) { WidenedMask.push_back(Mask[i] / 2); continue; } diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index 21806489815..db7091485be 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -665,3 +665,40 @@ entry: ret <16 x i8> %shuffle } + +define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) { +; SSSE3-LABEL: @stress_test2 +; SSSE3: # BB#0: +; SSSE3-NEXT: pshufb {{.*}} # xmm0 = zero,zero,xmm0[2],zero,zero,zero,xmm0[11],zero,zero,xmm0[3,4,5],zero,zero,xmm0[15,5] +; SSSE3-NEXT: movdqa %xmm1, %[[X1:xmm[0-9]+]] +; SSSE3-NEXT: pshufb {{.*}} # [[X1]] = [[X1]][13,14],zero,[[X1]][0,10,5],zero,[[X1]][10,10],zero,zero,zero,[[X1]][14,12],zero,zero +; SSSE3-NEXT: por %xmm0, %[[X1]] +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: pshufb {{.*}} # xmm0 = zero,zero,zero,zero,zero,zero,xmm0[1,6,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movdqa %xmm1, %[[X2:xmm[0-9]+]] +; SSSE3-NEXT: pshufb {{.*}} # [[X2]] = [[X2]][1,12,5,9,1,5],zero,zero,[[X2]][u,u,u,u,u,u,u,u] +; SSSE3-NEXT: por %xmm0, %[[X2]] +; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,xmm1[2],zero,zero,xmm1[6,15,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: pshufb {{.*}} # xmm2 = xmm2[15,8,12],zero,xmm2[13,15],zero,zero,xmm2[u,u,u,u,u,u,u,u] +; SSSE3-NEXT: por %xmm1, %xmm2 +; SSSE3-NEXT: punpcklbw {{.*}} # xmm2 = xmm2[0],[[X2]][0],xmm2[1],[[X2]][1],xmm2[2],[[X2]][2],xmm2[3],[[X2]][3],xmm2[4],[[X2]][4],xmm2[5],[[X2]][5],xmm2[6],[[X2]][6],xmm2[7],[[X2]][7] +; SSSE3-NEXT: movdqa %xmm2, %[[X3:xmm[0-9]+]] +; SSSE3-NEXT: pshufb {{.*}} # [[X3]] = [[X3]][6],zero,[[X3]][14,14],zero,zero,[[X3]][11,0,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movdqa %[[X1]], %xmm0 +; SSSE3-NEXT: pshufb {{.*}} # xmm0 = zero,xmm0[12],zero,zero,xmm0[1,14],zero,zero,xmm0[u,u,u,u,u,u,u,u] +; SSSE3-NEXT: por %[[X3]], %xmm0 +; SSSE3-NEXT: pshufb {{.*}} # xmm2 = zero,xmm2[u,2,3,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movdqa %[[X1]], %[[X3]] +; SSSE3-NEXT: pshufb {{.*}} # [[X3]] = [[X3]][3,u],zero,zero,[[X3]][u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: por %xmm2, %[[X3]] +; SSSE3-NEXT: pshufb {{.*}} # [[X1]] = [[X1]][1,4,10,13,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: punpcklbw {{.*}} # [[X1]] = [[X1]][0],[[X3]][0],[[X1]][1],[[X3]][1],[[X1]][2],[[X3]][2],[[X1]][3],[[X3]][3],[[X1]][4],[[X3]][4],[[X1]][5],[[X3]][5],[[X1]][6],[[X3]][6],[[X1]][7],[[X3]][7] +; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3],xmm0[4],[[X1]][4],xmm0[5],[[X1]][5],xmm0[6],[[X1]][6],xmm0[7],[[X1]][7] +; SSSE3-NEXT: retq +entry: + %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> + %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> + %s.2.0 = shufflevector <16 x i8> %s.1.0, <16 x i8> %s.1.1, <16 x i32> + + ret <16 x i8> %s.2.0 +}