diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 75e05883b6b..e1f777b402a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5770,21 +5770,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); ArrayRef MaskVals = SVOp->getMask(); + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. - // FIXME: kill V2Only once shuffles are canonizalized by getNode. - bool V1Only = true; - bool V2Only = true; - for (unsigned i = 0; i < 16; ++i) { - int EltIdx = MaskVals[i]; - if (EltIdx < 0) - continue; - if (EltIdx < 16) - V2Only = false; - else - V1Only = false; - } // If SSSE3, use 1 pshufb instruction per vector with elements in the result. if (TLI.getSubtarget()->hasSSSE3()) { @@ -5796,23 +5786,16 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, // Otherwise, we have elements from both input vectors, and must zero out // elements that come from V2 in the first mask, and V1 in the second mask // so that we can OR them together. - bool TwoInputs = !(V1Only || V2Only); for (unsigned i = 0; i != 16; ++i) { int EltIdx = MaskVals[i]; - if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) { - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); - continue; - } + if (EltIdx < 0 || EltIdx >= 16) + EltIdx = 0x80; pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); } - // If all the elements are from V2, assign it to V1 and return after - // building the first pshufb. - if (V2Only) - V1 = V2; V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); - if (!TwoInputs) + if (V2IsUndef) return V1; // Calculate the shuffle mask for the second input, shuffle it, and @@ -5820,10 +5803,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, pshufbMask.clear(); for (unsigned i = 0; i != 16; ++i) { int EltIdx = MaskVals[i]; - if (EltIdx < 16) { - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); - continue; - } + EltIdx = (EltIdx < 16) ? 0x80 : EltIdx - 16; pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8)); } V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, @@ -5837,7 +5817,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, // the 16 different words that comprise the two doublequadword input vectors. V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2); - SDValue NewV = V2Only ? V2 : V1; + SDValue NewV = V1; for (int i = 0; i != 8; ++i) { int Elt0 = MaskVals[i*2]; int Elt1 = MaskVals[i*2+1]; @@ -5847,9 +5827,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, continue; // This word of the result is already in the correct place, skip it. - if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1)) - continue; - if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17)) + if ((Elt0 == i*2) && (Elt1 == i*2+1)) continue; SDValue Elt0Src = Elt0 < 16 ? V1 : V2; diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll index 5ea1b4dff1c..48638b3b696 100644 --- a/test/CodeGen/X86/sse3.ll +++ b/test/CodeGen/X86/sse3.ll @@ -249,9 +249,10 @@ entry: ; X64: t16: ; X64: pextrw $8, %xmm0, %eax ; X64: pslldq $2, %xmm0 -; X64: movd %xmm0, %ecx -; X64: pextrw $1, %xmm0, %edx -; X64: pinsrw $0, %ecx, %xmm0 +; X64: pextrw $1, %xmm0, %ecx +; X64: movzbl %cl, %ecx +; X64: orl %eax, %ecx +; X64: pinsrw $1, %ecx, %xmm0 ; X64: ret } diff --git a/test/CodeGen/X86/vec_shuffle-35.ll b/test/CodeGen/X86/vec_shuffle-35.ll index 7f0fcb5969e..f5083b4b801 100644 --- a/test/CodeGen/X86/vec_shuffle-35.ll +++ b/test/CodeGen/X86/vec_shuffle-35.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t -; RUN: grep pextrw %t | count 13 -; RUN: grep pinsrw %t | count 14 +; RUN: grep pextrw %t | count 12 +; RUN: grep pinsrw %t | count 13 ; RUN: grep rolw %t | count 13 ; RUN: not grep esp %t ; RUN: not grep ebp %t